mirror of https://github.com/torvalds/linux.git
namespace-6.19-rc1
-----BEGIN PGP SIGNATURE-----
iHUEABYKAB0WIQRAhzRXHqcMeLMyaSiRxhvAZXjcogUCaSmOZQAKCRCRxhvAZXjc
ooKwAP4kR5kMjHlthf8jHmmCjVU3nQFO9hUZsIQL9gFJLOIQMAD+LLoTaq1WJufl
oSgZpREXZVmI1TK61eR6EZMB1YikGAo=
=TExi
-----END PGP SIGNATURE-----
Merge tag 'namespace-6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull namespace updates from Christian Brauner:
"This contains substantial namespace infrastructure changes including a new
system call, active reference counting, and extensive header cleanups.
The branch depends on the shared kbuild branch for -fms-extensions support.
Features:
- listns() system call
Add a new listns() system call that allows userspace to iterate
through namespaces in the system. This provides a programmatic
interface to discover and inspect namespaces, addressing
longstanding limitations:
Currently, there is no direct way for userspace to enumerate
namespaces. Applications must resort to scanning /proc/*/ns/ across
all processes, which is:
- Inefficient - requires iterating over all processes
- Incomplete - misses namespaces not attached to any running
process but kept alive by file descriptors, bind mounts, or
parent references
- Permission-heavy - requires access to /proc for many processes
- No ordering or ownership information
- No filtering per namespace type
The listns() system call solves these problems:
ssize_t listns(const struct ns_id_req *req, u64 *ns_ids,
size_t nr_ns_ids, unsigned int flags);
struct ns_id_req {
__u32 size;
__u32 spare;
__u64 ns_id;
struct /* listns */ {
__u32 ns_type;
__u32 spare2;
__u64 user_ns_id;
};
};
Features include:
- Pagination support for large namespace sets
- Filtering by namespace type (MNT_NS, NET_NS, USER_NS, etc.)
- Filtering by owning user namespace
- Permission checks respecting namespace isolation
- Active Reference Counting
Introduce an active reference count that tracks namespace
visibility to userspace. A namespace is visible in the following
cases:
- The namespace is in use by a task
- The namespace is persisted through a VFS object (namespace file
descriptor or bind-mount)
- The namespace is a hierarchical type and is the parent of child
namespaces
The active reference count does not regulate lifetime (that's still
done by the normal reference count) - it only regulates visibility
to namespace file handles and listns().
This prevents resurrection of namespaces that are pinned only for
internal kernel reasons (e.g., user namespaces held by
file->f_cred, lazy TLB references on idle CPUs, etc.) which should
not be accessible via (1)-(3).
- Unified Namespace Tree
Introduce a unified tree structure for all namespaces with:
- Fixed IDs assigned to initial namespaces
- Lookup based solely on inode number
- Maintained list of owned namespaces per user namespace
- Simplified rbtree comparison helpers
Cleanups
- Header Reorganization:
- Move namespace types into separate header (ns_common_types.h)
- Decouple nstree from ns_common header
- Move nstree types into separate header
- Switch to new ns_tree_{node,root} structures with helper functions
- Use guards for ns_tree_lock
- Initial Namespace Reference Count Optimization
- Make all reference counts on initial namespaces a nop to avoid
pointless cacheline ping-pong for namespaces that can never go
away
- Drop custom reference count initialization for initial namespaces
- Add NS_COMMON_INIT() macro and use it for all namespaces
- pid: rely on common reference count behavior
- Miscellaneous Cleanups
- Rename exit_task_namespaces() to exit_nsproxy_namespaces()
- Rename is_initial_namespace() and make argument const
- Use boolean to indicate anonymous mount namespace
- Simplify owner list iteration in nstree
- nsfs: raise SB_I_NODEV, SB_I_NOEXEC, and DCACHE_DONTCACHE explicitly
- nsfs: use inode_just_drop()
- pidfs: raise DCACHE_DONTCACHE explicitly
- pidfs: simplify PIDFD_GET__NAMESPACE ioctls
- libfs: allow to specify s_d_flags
- cgroup: add cgroup namespace to tree after owner is set
- nsproxy: fix free_nsproxy() and simplify create_new_namespaces()
Fixes:
- setns(pidfd, ...) race condition
Fix a subtle race when using pidfds with setns(). When the target
task exits after prepare_nsset() but before commit_nsset(), the
namespace's active reference count might have been dropped. If
setns() then installs the namespaces, it would bump the active
reference count from zero without taking the required reference on
the owner namespace, leading to underflow when later decremented.
The fix resurrects the ownership chain if necessary - if the caller
succeeded in grabbing passive references, the setns() should
succeed even if the target task exits or gets reaped.
- Return EFAULT on put_user() error instead of success
- Make sure references are dropped outside of RCU lock (some
namespaces like mount namespace sleep when putting the last
reference)
- Don't skip active reference count initialization for network
namespace
- Add asserts for active refcount underflow
- Add asserts for initial namespace reference counts (both passive
and active)
- ipc: enable is_ns_init_id() assertions
- Fix kernel-doc comments for internal nstree functions
- Selftests
- 15 active reference count tests
- 9 listns() functionality tests
- 7 listns() permission tests
- 12 inactive namespace resurrection tests
- 3 threaded active reference count tests
- commit_creds() active reference tests
- Pagination and stress tests
- EFAULT handling test
- nsid tests fixes"
* tag 'namespace-6.19-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs: (103 commits)
pidfs: simplify PIDFD_GET_<type>_NAMESPACE ioctls
nstree: fix kernel-doc comments for internal functions
nsproxy: fix free_nsproxy() and simplify create_new_namespaces()
selftests/namespaces: fix nsid tests
ns: drop custom reference count initialization for initial namespaces
pid: rely on common reference count behavior
ns: add asserts for initial namespace active reference counts
ns: add asserts for initial namespace reference counts
ns: make all reference counts on initial namespace a nop
ipc: enable is_ns_init_id() assertions
fs: use boolean to indicate anonymous mount namespace
ns: rename is_initial_namespace()
ns: make is_initial_namespace() argument const
nstree: use guards for ns_tree_lock
nstree: simplify owner list iteration
nstree: switch to new structures
nstree: add helper to operate on struct ns_tree_{node,root}
nstree: move nstree types into separate header
nstree: decouple from ns_common header
ns: move namespace types into separate header
...
This commit is contained in:
commit
415d34b92c
|
|
@ -509,3 +509,4 @@
|
||||||
577 common open_tree_attr sys_open_tree_attr
|
577 common open_tree_attr sys_open_tree_attr
|
||||||
578 common file_getattr sys_file_getattr
|
578 common file_getattr sys_file_getattr
|
||||||
579 common file_setattr sys_file_setattr
|
579 common file_setattr sys_file_setattr
|
||||||
|
580 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -484,3 +484,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -481,3 +481,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -469,3 +469,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -475,3 +475,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -408,3 +408,4 @@
|
||||||
467 n32 open_tree_attr sys_open_tree_attr
|
467 n32 open_tree_attr sys_open_tree_attr
|
||||||
468 n32 file_getattr sys_file_getattr
|
468 n32 file_getattr sys_file_getattr
|
||||||
469 n32 file_setattr sys_file_setattr
|
469 n32 file_setattr sys_file_setattr
|
||||||
|
470 n32 listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -384,3 +384,4 @@
|
||||||
467 n64 open_tree_attr sys_open_tree_attr
|
467 n64 open_tree_attr sys_open_tree_attr
|
||||||
468 n64 file_getattr sys_file_getattr
|
468 n64 file_getattr sys_file_getattr
|
||||||
469 n64 file_setattr sys_file_setattr
|
469 n64 file_setattr sys_file_setattr
|
||||||
|
470 n64 listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -457,3 +457,4 @@
|
||||||
467 o32 open_tree_attr sys_open_tree_attr
|
467 o32 open_tree_attr sys_open_tree_attr
|
||||||
468 o32 file_getattr sys_file_getattr
|
468 o32 file_getattr sys_file_getattr
|
||||||
469 o32 file_setattr sys_file_setattr
|
469 o32 file_setattr sys_file_setattr
|
||||||
|
470 o32 listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -468,3 +468,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -560,3 +560,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -472,3 +472,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -473,3 +473,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -515,3 +515,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -475,3 +475,4 @@
|
||||||
467 i386 open_tree_attr sys_open_tree_attr
|
467 i386 open_tree_attr sys_open_tree_attr
|
||||||
468 i386 file_getattr sys_file_getattr
|
468 i386 file_getattr sys_file_getattr
|
||||||
469 i386 file_setattr sys_file_setattr
|
469 i386 file_setattr sys_file_setattr
|
||||||
|
470 i386 listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -394,6 +394,7 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
||||||
#
|
#
|
||||||
# Due to a historical design error, certain syscalls are numbered differently
|
# Due to a historical design error, certain syscalls are numbered differently
|
||||||
|
|
|
||||||
|
|
@ -440,3 +440,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -680,6 +680,7 @@ static int pseudo_fs_fill_super(struct super_block *s, struct fs_context *fc)
|
||||||
s->s_export_op = ctx->eops;
|
s->s_export_op = ctx->eops;
|
||||||
s->s_xattr = ctx->xattr;
|
s->s_xattr = ctx->xattr;
|
||||||
s->s_time_gran = 1;
|
s->s_time_gran = 1;
|
||||||
|
s->s_d_flags |= ctx->s_d_flags;
|
||||||
root = new_inode(s);
|
root = new_inode(s);
|
||||||
if (!root)
|
if (!root)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
|
||||||
|
|
@ -27,6 +27,7 @@ struct mnt_namespace {
|
||||||
unsigned int nr_mounts; /* # of mounts in the namespace */
|
unsigned int nr_mounts; /* # of mounts in the namespace */
|
||||||
unsigned int pending_mounts;
|
unsigned int pending_mounts;
|
||||||
refcount_t passive; /* number references not pinning @mounts */
|
refcount_t passive; /* number references not pinning @mounts */
|
||||||
|
bool is_anon;
|
||||||
} __randomize_layout;
|
} __randomize_layout;
|
||||||
|
|
||||||
struct mnt_pcp {
|
struct mnt_pcp {
|
||||||
|
|
@ -175,7 +176,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry)
|
||||||
|
|
||||||
static inline bool is_anon_ns(struct mnt_namespace *ns)
|
static inline bool is_anon_ns(struct mnt_namespace *ns)
|
||||||
{
|
{
|
||||||
return ns->ns.ns_id == 0;
|
return ns->is_anon;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool anon_ns_root(const struct mount *m)
|
static inline bool anon_ns_root(const struct mount *m)
|
||||||
|
|
|
||||||
|
|
@ -4090,8 +4090,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
|
||||||
dec_mnt_namespaces(ucounts);
|
dec_mnt_namespaces(ucounts);
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
}
|
}
|
||||||
if (!anon)
|
ns_tree_gen_id(new_ns);
|
||||||
ns_tree_gen_id(&new_ns->ns);
|
|
||||||
|
new_ns->is_anon = anon;
|
||||||
refcount_set(&new_ns->passive, 1);
|
refcount_set(&new_ns->passive, 1);
|
||||||
new_ns->mounts = RB_ROOT;
|
new_ns->mounts = RB_ROOT;
|
||||||
init_waitqueue_head(&new_ns->poll);
|
init_waitqueue_head(&new_ns->poll);
|
||||||
|
|
@ -5982,11 +5983,8 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
|
||||||
}
|
}
|
||||||
|
|
||||||
struct mnt_namespace init_mnt_ns = {
|
struct mnt_namespace init_mnt_ns = {
|
||||||
.ns.inum = ns_init_inum(&init_mnt_ns),
|
.ns = NS_COMMON_INIT(init_mnt_ns),
|
||||||
.ns.ops = &mntns_operations,
|
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(1),
|
|
||||||
.ns.ns_type = ns_common_type(&init_mnt_ns),
|
|
||||||
.passive = REFCOUNT_INIT(1),
|
.passive = REFCOUNT_INIT(1),
|
||||||
.mounts = RB_ROOT,
|
.mounts = RB_ROOT,
|
||||||
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
|
.poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll),
|
||||||
|
|
|
||||||
101
fs/nsfs.c
101
fs/nsfs.c
|
|
@ -58,6 +58,8 @@ const struct dentry_operations ns_dentry_operations = {
|
||||||
static void nsfs_evict(struct inode *inode)
|
static void nsfs_evict(struct inode *inode)
|
||||||
{
|
{
|
||||||
struct ns_common *ns = inode->i_private;
|
struct ns_common *ns = inode->i_private;
|
||||||
|
|
||||||
|
__ns_ref_active_put(ns);
|
||||||
clear_inode(inode);
|
clear_inode(inode);
|
||||||
ns->ops->put(ns);
|
ns->ops->put(ns);
|
||||||
}
|
}
|
||||||
|
|
@ -408,6 +410,7 @@ static const struct super_operations nsfs_ops = {
|
||||||
.statfs = simple_statfs,
|
.statfs = simple_statfs,
|
||||||
.evict_inode = nsfs_evict,
|
.evict_inode = nsfs_evict,
|
||||||
.show_path = nsfs_show_path,
|
.show_path = nsfs_show_path,
|
||||||
|
.drop_inode = inode_just_drop,
|
||||||
};
|
};
|
||||||
|
|
||||||
static int nsfs_init_inode(struct inode *inode, void *data)
|
static int nsfs_init_inode(struct inode *inode, void *data)
|
||||||
|
|
@ -418,6 +421,16 @@ static int nsfs_init_inode(struct inode *inode, void *data)
|
||||||
inode->i_mode |= S_IRUGO;
|
inode->i_mode |= S_IRUGO;
|
||||||
inode->i_fop = &ns_file_operations;
|
inode->i_fop = &ns_file_operations;
|
||||||
inode->i_ino = ns->inum;
|
inode->i_ino = ns->inum;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Bring the namespace subtree back to life if we have to. This
|
||||||
|
* can happen when e.g., all processes using a network namespace
|
||||||
|
* and all namespace files or namespace file bind-mounts have
|
||||||
|
* died but there are still sockets pinning it. The SIOCGSKNS
|
||||||
|
* ioctl on such a socket will resurrect the relevant namespace
|
||||||
|
* subtree.
|
||||||
|
*/
|
||||||
|
__ns_ref_active_get(ns);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -458,6 +471,45 @@ static int nsfs_encode_fh(struct inode *inode, u32 *fh, int *max_len,
|
||||||
return FILEID_NSFS;
|
return FILEID_NSFS;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool is_current_namespace(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
switch (ns->ns_type) {
|
||||||
|
#ifdef CONFIG_CGROUPS
|
||||||
|
case CLONE_NEWCGROUP:
|
||||||
|
return current_in_namespace(to_cg_ns(ns));
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_IPC_NS
|
||||||
|
case CLONE_NEWIPC:
|
||||||
|
return current_in_namespace(to_ipc_ns(ns));
|
||||||
|
#endif
|
||||||
|
case CLONE_NEWNS:
|
||||||
|
return current_in_namespace(to_mnt_ns(ns));
|
||||||
|
#ifdef CONFIG_NET_NS
|
||||||
|
case CLONE_NEWNET:
|
||||||
|
return current_in_namespace(to_net_ns(ns));
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_PID_NS
|
||||||
|
case CLONE_NEWPID:
|
||||||
|
return current_in_namespace(to_pid_ns(ns));
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_TIME_NS
|
||||||
|
case CLONE_NEWTIME:
|
||||||
|
return current_in_namespace(to_time_ns(ns));
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_USER_NS
|
||||||
|
case CLONE_NEWUSER:
|
||||||
|
return current_in_namespace(to_user_ns(ns));
|
||||||
|
#endif
|
||||||
|
#ifdef CONFIG_UTS_NS
|
||||||
|
case CLONE_NEWUTS:
|
||||||
|
return current_in_namespace(to_uts_ns(ns));
|
||||||
|
#endif
|
||||||
|
default:
|
||||||
|
VFS_WARN_ON_ONCE(true);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||||
int fh_len, int fh_type)
|
int fh_len, int fh_type)
|
||||||
{
|
{
|
||||||
|
|
@ -483,18 +535,35 @@ static struct dentry *nsfs_fh_to_dentry(struct super_block *sb, struct fid *fh,
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!fid->ns_id)
|
||||||
|
return NULL;
|
||||||
|
/* Either both are set or both are unset. */
|
||||||
|
if (!fid->ns_inum != !fid->ns_type)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
scoped_guard(rcu) {
|
scoped_guard(rcu) {
|
||||||
ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
|
ns = ns_tree_lookup_rcu(fid->ns_id, fid->ns_type);
|
||||||
if (!ns)
|
if (!ns)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
|
VFS_WARN_ON_ONCE(ns->ns_id != fid->ns_id);
|
||||||
VFS_WARN_ON_ONCE(ns->ns_type != fid->ns_type);
|
|
||||||
|
|
||||||
if (ns->inum != fid->ns_inum)
|
if (fid->ns_inum && (fid->ns_inum != ns->inum))
|
||||||
|
return NULL;
|
||||||
|
if (fid->ns_type && (fid->ns_type != ns->ns_type))
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
if (!__ns_ref_get(ns))
|
/*
|
||||||
|
* This is racy because we're not actually taking an
|
||||||
|
* active reference. IOW, it could happen that the
|
||||||
|
* namespace becomes inactive after this check.
|
||||||
|
* We don't care because nsfs_init_inode() will just
|
||||||
|
* resurrect the relevant namespace tree for us. If it
|
||||||
|
* has been active here we just allow it's resurrection.
|
||||||
|
* We could try to take an active reference here and
|
||||||
|
* then drop it again. But really, why bother.
|
||||||
|
*/
|
||||||
|
if (!ns_get_unless_inactive(ns))
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -590,6 +659,8 @@ static int nsfs_init_fs_context(struct fs_context *fc)
|
||||||
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
|
struct pseudo_fs_context *ctx = init_pseudo(fc, NSFS_MAGIC);
|
||||||
if (!ctx)
|
if (!ctx)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
fc->s_iflags |= SB_I_NOEXEC | SB_I_NODEV;
|
||||||
|
ctx->s_d_flags |= DCACHE_DONTCACHE;
|
||||||
ctx->ops = &nsfs_ops;
|
ctx->ops = &nsfs_ops;
|
||||||
ctx->eops = &nsfs_export_operations;
|
ctx->eops = &nsfs_export_operations;
|
||||||
ctx->dops = &ns_dentry_operations;
|
ctx->dops = &ns_dentry_operations;
|
||||||
|
|
@ -612,3 +683,27 @@ void __init nsfs_init(void)
|
||||||
nsfs_root_path.mnt = nsfs_mnt;
|
nsfs_root_path.mnt = nsfs_mnt;
|
||||||
nsfs_root_path.dentry = nsfs_mnt->mnt_root;
|
nsfs_root_path.dentry = nsfs_mnt->mnt_root;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void nsproxy_ns_active_get(struct nsproxy *ns)
|
||||||
|
{
|
||||||
|
ns_ref_active_get(ns->mnt_ns);
|
||||||
|
ns_ref_active_get(ns->uts_ns);
|
||||||
|
ns_ref_active_get(ns->ipc_ns);
|
||||||
|
ns_ref_active_get(ns->pid_ns_for_children);
|
||||||
|
ns_ref_active_get(ns->cgroup_ns);
|
||||||
|
ns_ref_active_get(ns->net_ns);
|
||||||
|
ns_ref_active_get(ns->time_ns);
|
||||||
|
ns_ref_active_get(ns->time_ns_for_children);
|
||||||
|
}
|
||||||
|
|
||||||
|
void nsproxy_ns_active_put(struct nsproxy *ns)
|
||||||
|
{
|
||||||
|
ns_ref_active_put(ns->mnt_ns);
|
||||||
|
ns_ref_active_put(ns->uts_ns);
|
||||||
|
ns_ref_active_put(ns->ipc_ns);
|
||||||
|
ns_ref_active_put(ns->pid_ns_for_children);
|
||||||
|
ns_ref_active_put(ns->cgroup_ns);
|
||||||
|
ns_ref_active_put(ns->net_ns);
|
||||||
|
ns_ref_active_put(ns->time_ns);
|
||||||
|
ns_ref_active_put(ns->time_ns_for_children);
|
||||||
|
}
|
||||||
|
|
|
||||||
76
fs/pidfs.c
76
fs/pidfs.c
|
|
@ -454,7 +454,6 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||||
struct task_struct *task __free(put_task) = NULL;
|
struct task_struct *task __free(put_task) = NULL;
|
||||||
struct nsproxy *nsp __free(put_nsproxy) = NULL;
|
struct nsproxy *nsp __free(put_nsproxy) = NULL;
|
||||||
struct ns_common *ns_common = NULL;
|
struct ns_common *ns_common = NULL;
|
||||||
struct pid_namespace *pid_ns;
|
|
||||||
|
|
||||||
if (!pidfs_ioctl_valid(cmd))
|
if (!pidfs_ioctl_valid(cmd))
|
||||||
return -ENOIOCTLCMD;
|
return -ENOIOCTLCMD;
|
||||||
|
|
@ -496,66 +495,64 @@ static long pidfd_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
|
||||||
switch (cmd) {
|
switch (cmd) {
|
||||||
/* Namespaces that hang of nsproxy. */
|
/* Namespaces that hang of nsproxy. */
|
||||||
case PIDFD_GET_CGROUP_NAMESPACE:
|
case PIDFD_GET_CGROUP_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_CGROUPS)) {
|
if (!ns_ref_get(nsp->cgroup_ns))
|
||||||
get_cgroup_ns(nsp->cgroup_ns);
|
break;
|
||||||
ns_common = to_ns_common(nsp->cgroup_ns);
|
ns_common = to_ns_common(nsp->cgroup_ns);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_IPC_NAMESPACE:
|
case PIDFD_GET_IPC_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_IPC_NS)) {
|
if (!ns_ref_get(nsp->ipc_ns))
|
||||||
get_ipc_ns(nsp->ipc_ns);
|
break;
|
||||||
ns_common = to_ns_common(nsp->ipc_ns);
|
ns_common = to_ns_common(nsp->ipc_ns);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_MNT_NAMESPACE:
|
case PIDFD_GET_MNT_NAMESPACE:
|
||||||
get_mnt_ns(nsp->mnt_ns);
|
if (!ns_ref_get(nsp->mnt_ns))
|
||||||
|
break;
|
||||||
ns_common = to_ns_common(nsp->mnt_ns);
|
ns_common = to_ns_common(nsp->mnt_ns);
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_NET_NAMESPACE:
|
case PIDFD_GET_NET_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_NET_NS)) {
|
if (!ns_ref_get(nsp->net_ns))
|
||||||
ns_common = to_ns_common(nsp->net_ns);
|
break;
|
||||||
get_net_ns(ns_common);
|
ns_common = to_ns_common(nsp->net_ns);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
|
case PIDFD_GET_PID_FOR_CHILDREN_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_PID_NS)) {
|
if (!ns_ref_get(nsp->pid_ns_for_children))
|
||||||
get_pid_ns(nsp->pid_ns_for_children);
|
break;
|
||||||
ns_common = to_ns_common(nsp->pid_ns_for_children);
|
ns_common = to_ns_common(nsp->pid_ns_for_children);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_TIME_NAMESPACE:
|
case PIDFD_GET_TIME_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_TIME_NS)) {
|
if (!ns_ref_get(nsp->time_ns))
|
||||||
get_time_ns(nsp->time_ns);
|
break;
|
||||||
ns_common = to_ns_common(nsp->time_ns);
|
ns_common = to_ns_common(nsp->time_ns);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
|
case PIDFD_GET_TIME_FOR_CHILDREN_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_TIME_NS)) {
|
if (!ns_ref_get(nsp->time_ns_for_children))
|
||||||
get_time_ns(nsp->time_ns_for_children);
|
break;
|
||||||
ns_common = to_ns_common(nsp->time_ns_for_children);
|
ns_common = to_ns_common(nsp->time_ns_for_children);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_UTS_NAMESPACE:
|
case PIDFD_GET_UTS_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_UTS_NS)) {
|
if (!ns_ref_get(nsp->uts_ns))
|
||||||
get_uts_ns(nsp->uts_ns);
|
break;
|
||||||
ns_common = to_ns_common(nsp->uts_ns);
|
ns_common = to_ns_common(nsp->uts_ns);
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
/* Namespaces that don't hang of nsproxy. */
|
/* Namespaces that don't hang of nsproxy. */
|
||||||
case PIDFD_GET_USER_NAMESPACE:
|
case PIDFD_GET_USER_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_USER_NS)) {
|
scoped_guard(rcu) {
|
||||||
rcu_read_lock();
|
struct user_namespace *user_ns;
|
||||||
ns_common = to_ns_common(get_user_ns(task_cred_xxx(task, user_ns)));
|
|
||||||
rcu_read_unlock();
|
user_ns = task_cred_xxx(task, user_ns);
|
||||||
|
if (!ns_ref_get(user_ns))
|
||||||
|
break;
|
||||||
|
ns_common = to_ns_common(user_ns);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
case PIDFD_GET_PID_NAMESPACE:
|
case PIDFD_GET_PID_NAMESPACE:
|
||||||
if (IS_ENABLED(CONFIG_PID_NS)) {
|
scoped_guard(rcu) {
|
||||||
rcu_read_lock();
|
struct pid_namespace *pid_ns;
|
||||||
|
|
||||||
pid_ns = task_active_pid_ns(task);
|
pid_ns = task_active_pid_ns(task);
|
||||||
if (pid_ns)
|
if (!ns_ref_get(pid_ns))
|
||||||
ns_common = to_ns_common(get_pid_ns(pid_ns));
|
break;
|
||||||
rcu_read_unlock();
|
ns_common = to_ns_common(pid_ns);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
|
|
@ -1022,6 +1019,7 @@ static int pidfs_init_fs_context(struct fs_context *fc)
|
||||||
|
|
||||||
fc->s_iflags |= SB_I_NOEXEC;
|
fc->s_iflags |= SB_I_NOEXEC;
|
||||||
fc->s_iflags |= SB_I_NODEV;
|
fc->s_iflags |= SB_I_NODEV;
|
||||||
|
ctx->s_d_flags |= DCACHE_DONTCACHE;
|
||||||
ctx->ops = &pidfs_sops;
|
ctx->ops = &pidfs_sops;
|
||||||
ctx->eops = &pidfs_export_operations;
|
ctx->eops = &pidfs_export_operations;
|
||||||
ctx->dops = &pidfs_dentry_operations;
|
ctx->dops = &pidfs_dentry_operations;
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,196 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
#ifndef _LINUX_NS_COMMON_TYPES_H
|
||||||
|
#define _LINUX_NS_COMMON_TYPES_H
|
||||||
|
|
||||||
|
#include <linux/atomic.h>
|
||||||
|
#include <linux/ns/nstree_types.h>
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/refcount.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
|
||||||
|
struct cgroup_namespace;
|
||||||
|
struct dentry;
|
||||||
|
struct ipc_namespace;
|
||||||
|
struct mnt_namespace;
|
||||||
|
struct net;
|
||||||
|
struct pid_namespace;
|
||||||
|
struct proc_ns_operations;
|
||||||
|
struct time_namespace;
|
||||||
|
struct user_namespace;
|
||||||
|
struct uts_namespace;
|
||||||
|
|
||||||
|
extern struct cgroup_namespace init_cgroup_ns;
|
||||||
|
extern struct ipc_namespace init_ipc_ns;
|
||||||
|
extern struct mnt_namespace init_mnt_ns;
|
||||||
|
extern struct net init_net;
|
||||||
|
extern struct pid_namespace init_pid_ns;
|
||||||
|
extern struct time_namespace init_time_ns;
|
||||||
|
extern struct user_namespace init_user_ns;
|
||||||
|
extern struct uts_namespace init_uts_ns;
|
||||||
|
|
||||||
|
extern const struct proc_ns_operations cgroupns_operations;
|
||||||
|
extern const struct proc_ns_operations ipcns_operations;
|
||||||
|
extern const struct proc_ns_operations mntns_operations;
|
||||||
|
extern const struct proc_ns_operations netns_operations;
|
||||||
|
extern const struct proc_ns_operations pidns_operations;
|
||||||
|
extern const struct proc_ns_operations pidns_for_children_operations;
|
||||||
|
extern const struct proc_ns_operations timens_operations;
|
||||||
|
extern const struct proc_ns_operations timens_for_children_operations;
|
||||||
|
extern const struct proc_ns_operations userns_operations;
|
||||||
|
extern const struct proc_ns_operations utsns_operations;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Namespace lifetimes are managed via a two-tier reference counting model:
|
||||||
|
*
|
||||||
|
* (1) __ns_ref (refcount_t): Main reference count tracking memory
|
||||||
|
* lifetime. Controls when the namespace structure itself is freed.
|
||||||
|
* It also pins the namespace on the namespace trees whereas (2)
|
||||||
|
* only regulates their visibility to userspace.
|
||||||
|
*
|
||||||
|
* (2) __ns_ref_active (atomic_t): Reference count tracking active users.
|
||||||
|
* Controls visibility of the namespace in the namespace trees.
|
||||||
|
* Any live task that uses the namespace (via nsproxy or cred) holds
|
||||||
|
* an active reference. Any open file descriptor or bind-mount of
|
||||||
|
* the namespace holds an active reference. Once all tasks have
|
||||||
|
* called exited their namespaces and all file descriptors and
|
||||||
|
* bind-mounts have been released the active reference count drops
|
||||||
|
* to zero and the namespace becomes inactive. IOW, the namespace
|
||||||
|
* cannot be listed or opened via file handles anymore.
|
||||||
|
*
|
||||||
|
* Note that it is valid to transition from active to inactive and
|
||||||
|
* back from inactive to active e.g., when resurrecting an inactive
|
||||||
|
* namespace tree via the SIOCGSKNS ioctl().
|
||||||
|
*
|
||||||
|
* Relationship and lifecycle states:
|
||||||
|
*
|
||||||
|
* - Active (__ns_ref_active > 0):
|
||||||
|
* Namespace is actively used and visible to userspace. The namespace
|
||||||
|
* can be reopened via /proc/<pid>/ns/<ns_type>, via namespace file
|
||||||
|
* handles, or discovered via listns().
|
||||||
|
*
|
||||||
|
* - Inactive (__ns_ref_active == 0, __ns_ref > 0):
|
||||||
|
* No tasks are actively using the namespace and it isn't pinned by
|
||||||
|
* any bind-mounts or open file descriptors anymore. But the namespace
|
||||||
|
* is still kept alive by internal references. For example, the user
|
||||||
|
* namespace could be pinned by an open file through file->f_cred
|
||||||
|
* references when one of the now defunct tasks had opened a file and
|
||||||
|
* handed the file descriptor off to another process via a UNIX
|
||||||
|
* sockets. Such references keep the namespace structure alive through
|
||||||
|
* __ns_ref but will not hold an active reference.
|
||||||
|
*
|
||||||
|
* - Destroyed (__ns_ref == 0):
|
||||||
|
* No references remain. The namespace is removed from the tree and freed.
|
||||||
|
*
|
||||||
|
* State transitions:
|
||||||
|
*
|
||||||
|
* Active -> Inactive:
|
||||||
|
* When the last task using the namespace exits it drops its active
|
||||||
|
* references to all namespaces. However, user and pid namespaces
|
||||||
|
* remain accessible until the task has been reaped.
|
||||||
|
*
|
||||||
|
* Inactive -> Active:
|
||||||
|
* An inactive namespace tree might be resurrected due to e.g., the
|
||||||
|
* SIOCGSKNS ioctl() on a socket.
|
||||||
|
*
|
||||||
|
* Inactive -> Destroyed:
|
||||||
|
* When __ns_ref drops to zero the namespace is removed from the
|
||||||
|
* namespaces trees and the memory is freed (after RCU grace period).
|
||||||
|
*
|
||||||
|
* Initial namespaces:
|
||||||
|
* Boot-time namespaces (init_net, init_pid_ns, etc.) start with
|
||||||
|
* __ns_ref_active = 1 and remain active forever.
|
||||||
|
*
|
||||||
|
* @ns_type: type of namespace (e.g., CLONE_NEWNET)
|
||||||
|
* @stashed: cached dentry to be used by the vfs
|
||||||
|
* @ops: namespace operations
|
||||||
|
* @inum: namespace inode number (quickly recycled for non-initial namespaces)
|
||||||
|
* @__ns_ref: main reference count (do not use directly)
|
||||||
|
* @ns_tree: namespace tree nodes and active reference count
|
||||||
|
*/
|
||||||
|
struct ns_common {
|
||||||
|
u32 ns_type;
|
||||||
|
struct dentry *stashed;
|
||||||
|
const struct proc_ns_operations *ops;
|
||||||
|
unsigned int inum;
|
||||||
|
refcount_t __ns_ref; /* do not use directly */
|
||||||
|
union {
|
||||||
|
struct ns_tree;
|
||||||
|
struct rcu_head ns_rcu;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
#define to_ns_common(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: &(__ns)->ns, \
|
||||||
|
const struct cgroup_namespace *: &(__ns)->ns, \
|
||||||
|
struct ipc_namespace *: &(__ns)->ns, \
|
||||||
|
const struct ipc_namespace *: &(__ns)->ns, \
|
||||||
|
struct mnt_namespace *: &(__ns)->ns, \
|
||||||
|
const struct mnt_namespace *: &(__ns)->ns, \
|
||||||
|
struct net *: &(__ns)->ns, \
|
||||||
|
const struct net *: &(__ns)->ns, \
|
||||||
|
struct pid_namespace *: &(__ns)->ns, \
|
||||||
|
const struct pid_namespace *: &(__ns)->ns, \
|
||||||
|
struct time_namespace *: &(__ns)->ns, \
|
||||||
|
const struct time_namespace *: &(__ns)->ns, \
|
||||||
|
struct user_namespace *: &(__ns)->ns, \
|
||||||
|
const struct user_namespace *: &(__ns)->ns, \
|
||||||
|
struct uts_namespace *: &(__ns)->ns, \
|
||||||
|
const struct uts_namespace *: &(__ns)->ns)
|
||||||
|
|
||||||
|
#define ns_init_inum(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
|
||||||
|
struct ipc_namespace *: IPC_NS_INIT_INO, \
|
||||||
|
struct mnt_namespace *: MNT_NS_INIT_INO, \
|
||||||
|
struct net *: NET_NS_INIT_INO, \
|
||||||
|
struct pid_namespace *: PID_NS_INIT_INO, \
|
||||||
|
struct time_namespace *: TIME_NS_INIT_INO, \
|
||||||
|
struct user_namespace *: USER_NS_INIT_INO, \
|
||||||
|
struct uts_namespace *: UTS_NS_INIT_INO)
|
||||||
|
|
||||||
|
#define ns_init_ns(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: &init_cgroup_ns, \
|
||||||
|
struct ipc_namespace *: &init_ipc_ns, \
|
||||||
|
struct mnt_namespace *: &init_mnt_ns, \
|
||||||
|
struct net *: &init_net, \
|
||||||
|
struct pid_namespace *: &init_pid_ns, \
|
||||||
|
struct time_namespace *: &init_time_ns, \
|
||||||
|
struct user_namespace *: &init_user_ns, \
|
||||||
|
struct uts_namespace *: &init_uts_ns)
|
||||||
|
|
||||||
|
#define ns_init_id(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: CGROUP_NS_INIT_ID, \
|
||||||
|
struct ipc_namespace *: IPC_NS_INIT_ID, \
|
||||||
|
struct mnt_namespace *: MNT_NS_INIT_ID, \
|
||||||
|
struct net *: NET_NS_INIT_ID, \
|
||||||
|
struct pid_namespace *: PID_NS_INIT_ID, \
|
||||||
|
struct time_namespace *: TIME_NS_INIT_ID, \
|
||||||
|
struct user_namespace *: USER_NS_INIT_ID, \
|
||||||
|
struct uts_namespace *: UTS_NS_INIT_ID)
|
||||||
|
|
||||||
|
#define to_ns_operations(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
|
||||||
|
struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
|
||||||
|
struct mnt_namespace *: &mntns_operations, \
|
||||||
|
struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
|
||||||
|
struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
|
||||||
|
struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
|
||||||
|
struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
|
||||||
|
struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
|
||||||
|
|
||||||
|
#define ns_common_type(__ns) \
|
||||||
|
_Generic((__ns), \
|
||||||
|
struct cgroup_namespace *: CLONE_NEWCGROUP, \
|
||||||
|
struct ipc_namespace *: CLONE_NEWIPC, \
|
||||||
|
struct mnt_namespace *: CLONE_NEWNS, \
|
||||||
|
struct net *: CLONE_NEWNET, \
|
||||||
|
struct pid_namespace *: CLONE_NEWPID, \
|
||||||
|
struct time_namespace *: CLONE_NEWTIME, \
|
||||||
|
struct user_namespace *: CLONE_NEWUSER, \
|
||||||
|
struct uts_namespace *: CLONE_NEWUTS)
|
||||||
|
|
||||||
|
#endif /* _LINUX_NS_COMMON_TYPES_H */
|
||||||
|
|
@ -0,0 +1,55 @@
|
||||||
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
|
||||||
|
#ifndef _LINUX_NSTREE_TYPES_H
|
||||||
|
#define _LINUX_NSTREE_TYPES_H
|
||||||
|
|
||||||
|
#include <linux/rbtree.h>
|
||||||
|
#include <linux/list.h>
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ns_tree_root - Root of a namespace tree
|
||||||
|
* @ns_rb: Red-black tree root for efficient lookups
|
||||||
|
* @ns_list_head: List head for sequential iteration
|
||||||
|
*
|
||||||
|
* Each namespace tree maintains both an rbtree (for O(log n) lookups)
|
||||||
|
* and a list (for efficient sequential iteration). The list is kept in
|
||||||
|
* the same sorted order as the rbtree.
|
||||||
|
*/
|
||||||
|
struct ns_tree_root {
|
||||||
|
struct rb_root ns_rb;
|
||||||
|
struct list_head ns_list_head;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ns_tree_node - Node in a namespace tree
|
||||||
|
* @ns_node: Red-black tree node
|
||||||
|
* @ns_list_entry: List entry for sequential iteration
|
||||||
|
*
|
||||||
|
* Represents a namespace's position in a tree. Each namespace has
|
||||||
|
* multiple tree nodes for different trees (unified, per-type, owner).
|
||||||
|
*/
|
||||||
|
struct ns_tree_node {
|
||||||
|
struct rb_node ns_node;
|
||||||
|
struct list_head ns_list_entry;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ns_tree - Namespace tree nodes and active reference count
|
||||||
|
* @ns_id: Unique namespace identifier
|
||||||
|
* @__ns_ref_active: Active reference count (do not use directly)
|
||||||
|
* @ns_unified_node: Node in the global namespace tree
|
||||||
|
* @ns_tree_node: Node in the per-type namespace tree
|
||||||
|
* @ns_owner_node: Node in the owner namespace's tree of owned namespaces
|
||||||
|
* @ns_owner_root: Root of the tree of namespaces owned by this namespace
|
||||||
|
* (only used when this namespace is an owner)
|
||||||
|
*/
|
||||||
|
struct ns_tree {
|
||||||
|
u64 ns_id;
|
||||||
|
atomic_t __ns_ref_active;
|
||||||
|
struct ns_tree_node ns_unified_node;
|
||||||
|
struct ns_tree_node ns_tree_node;
|
||||||
|
struct ns_tree_node ns_owner_node;
|
||||||
|
struct ns_tree_root ns_owner_root;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif /* _LINUX_NSTREE_TYPES_H */
|
||||||
|
|
@ -2,122 +2,44 @@
|
||||||
#ifndef _LINUX_NS_COMMON_H
|
#ifndef _LINUX_NS_COMMON_H
|
||||||
#define _LINUX_NS_COMMON_H
|
#define _LINUX_NS_COMMON_H
|
||||||
|
|
||||||
|
#include <linux/ns/ns_common_types.h>
|
||||||
#include <linux/refcount.h>
|
#include <linux/refcount.h>
|
||||||
#include <linux/rbtree.h>
|
#include <linux/vfsdebug.h>
|
||||||
#include <uapi/linux/sched.h>
|
#include <uapi/linux/sched.h>
|
||||||
|
#include <uapi/linux/nsfs.h>
|
||||||
|
|
||||||
struct proc_ns_operations;
|
bool is_current_namespace(struct ns_common *ns);
|
||||||
|
|
||||||
struct cgroup_namespace;
|
|
||||||
struct ipc_namespace;
|
|
||||||
struct mnt_namespace;
|
|
||||||
struct net;
|
|
||||||
struct pid_namespace;
|
|
||||||
struct time_namespace;
|
|
||||||
struct user_namespace;
|
|
||||||
struct uts_namespace;
|
|
||||||
|
|
||||||
extern struct cgroup_namespace init_cgroup_ns;
|
|
||||||
extern struct ipc_namespace init_ipc_ns;
|
|
||||||
extern struct mnt_namespace init_mnt_ns;
|
|
||||||
extern struct net init_net;
|
|
||||||
extern struct pid_namespace init_pid_ns;
|
|
||||||
extern struct time_namespace init_time_ns;
|
|
||||||
extern struct user_namespace init_user_ns;
|
|
||||||
extern struct uts_namespace init_uts_ns;
|
|
||||||
|
|
||||||
extern const struct proc_ns_operations netns_operations;
|
|
||||||
extern const struct proc_ns_operations utsns_operations;
|
|
||||||
extern const struct proc_ns_operations ipcns_operations;
|
|
||||||
extern const struct proc_ns_operations pidns_operations;
|
|
||||||
extern const struct proc_ns_operations pidns_for_children_operations;
|
|
||||||
extern const struct proc_ns_operations userns_operations;
|
|
||||||
extern const struct proc_ns_operations mntns_operations;
|
|
||||||
extern const struct proc_ns_operations cgroupns_operations;
|
|
||||||
extern const struct proc_ns_operations timens_operations;
|
|
||||||
extern const struct proc_ns_operations timens_for_children_operations;
|
|
||||||
|
|
||||||
struct ns_common {
|
|
||||||
u32 ns_type;
|
|
||||||
struct dentry *stashed;
|
|
||||||
const struct proc_ns_operations *ops;
|
|
||||||
unsigned int inum;
|
|
||||||
refcount_t __ns_ref; /* do not use directly */
|
|
||||||
union {
|
|
||||||
struct {
|
|
||||||
u64 ns_id;
|
|
||||||
struct rb_node ns_tree_node;
|
|
||||||
struct list_head ns_list_node;
|
|
||||||
};
|
|
||||||
struct rcu_head ns_rcu;
|
|
||||||
};
|
|
||||||
};
|
|
||||||
|
|
||||||
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
|
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum);
|
||||||
void __ns_common_free(struct ns_common *ns);
|
void __ns_common_free(struct ns_common *ns);
|
||||||
|
struct ns_common *__must_check ns_owner(struct ns_common *ns);
|
||||||
|
|
||||||
#define to_ns_common(__ns) \
|
static __always_inline bool is_ns_init_inum(const struct ns_common *ns)
|
||||||
_Generic((__ns), \
|
{
|
||||||
struct cgroup_namespace *: &(__ns)->ns, \
|
VFS_WARN_ON_ONCE(ns->inum == 0);
|
||||||
const struct cgroup_namespace *: &(__ns)->ns, \
|
return unlikely(in_range(ns->inum, MNT_NS_INIT_INO,
|
||||||
struct ipc_namespace *: &(__ns)->ns, \
|
IPC_NS_INIT_INO - MNT_NS_INIT_INO + 1));
|
||||||
const struct ipc_namespace *: &(__ns)->ns, \
|
}
|
||||||
struct mnt_namespace *: &(__ns)->ns, \
|
|
||||||
const struct mnt_namespace *: &(__ns)->ns, \
|
|
||||||
struct net *: &(__ns)->ns, \
|
|
||||||
const struct net *: &(__ns)->ns, \
|
|
||||||
struct pid_namespace *: &(__ns)->ns, \
|
|
||||||
const struct pid_namespace *: &(__ns)->ns, \
|
|
||||||
struct time_namespace *: &(__ns)->ns, \
|
|
||||||
const struct time_namespace *: &(__ns)->ns, \
|
|
||||||
struct user_namespace *: &(__ns)->ns, \
|
|
||||||
const struct user_namespace *: &(__ns)->ns, \
|
|
||||||
struct uts_namespace *: &(__ns)->ns, \
|
|
||||||
const struct uts_namespace *: &(__ns)->ns)
|
|
||||||
|
|
||||||
#define ns_init_inum(__ns) \
|
static __always_inline bool is_ns_init_id(const struct ns_common *ns)
|
||||||
_Generic((__ns), \
|
{
|
||||||
struct cgroup_namespace *: CGROUP_NS_INIT_INO, \
|
VFS_WARN_ON_ONCE(ns->ns_id == 0);
|
||||||
struct ipc_namespace *: IPC_NS_INIT_INO, \
|
return ns->ns_id <= NS_LAST_INIT_ID;
|
||||||
struct mnt_namespace *: MNT_NS_INIT_INO, \
|
}
|
||||||
struct net *: NET_NS_INIT_INO, \
|
|
||||||
struct pid_namespace *: PID_NS_INIT_INO, \
|
|
||||||
struct time_namespace *: TIME_NS_INIT_INO, \
|
|
||||||
struct user_namespace *: USER_NS_INIT_INO, \
|
|
||||||
struct uts_namespace *: UTS_NS_INIT_INO)
|
|
||||||
|
|
||||||
#define ns_init_ns(__ns) \
|
#define NS_COMMON_INIT(nsname) \
|
||||||
_Generic((__ns), \
|
{ \
|
||||||
struct cgroup_namespace *: &init_cgroup_ns, \
|
.ns_type = ns_common_type(&nsname), \
|
||||||
struct ipc_namespace *: &init_ipc_ns, \
|
.ns_id = ns_init_id(&nsname), \
|
||||||
struct mnt_namespace *: &init_mnt_ns, \
|
.inum = ns_init_inum(&nsname), \
|
||||||
struct net *: &init_net, \
|
.ops = to_ns_operations(&nsname), \
|
||||||
struct pid_namespace *: &init_pid_ns, \
|
.stashed = NULL, \
|
||||||
struct time_namespace *: &init_time_ns, \
|
.__ns_ref = REFCOUNT_INIT(1), \
|
||||||
struct user_namespace *: &init_user_ns, \
|
.__ns_ref_active = ATOMIC_INIT(1), \
|
||||||
struct uts_namespace *: &init_uts_ns)
|
.ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \
|
||||||
|
.ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \
|
||||||
#define to_ns_operations(__ns) \
|
.ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \
|
||||||
_Generic((__ns), \
|
.ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \
|
||||||
struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \
|
}
|
||||||
struct ipc_namespace *: (IS_ENABLED(CONFIG_IPC_NS) ? &ipcns_operations : NULL), \
|
|
||||||
struct mnt_namespace *: &mntns_operations, \
|
|
||||||
struct net *: (IS_ENABLED(CONFIG_NET_NS) ? &netns_operations : NULL), \
|
|
||||||
struct pid_namespace *: (IS_ENABLED(CONFIG_PID_NS) ? &pidns_operations : NULL), \
|
|
||||||
struct time_namespace *: (IS_ENABLED(CONFIG_TIME_NS) ? &timens_operations : NULL), \
|
|
||||||
struct user_namespace *: (IS_ENABLED(CONFIG_USER_NS) ? &userns_operations : NULL), \
|
|
||||||
struct uts_namespace *: (IS_ENABLED(CONFIG_UTS_NS) ? &utsns_operations : NULL))
|
|
||||||
|
|
||||||
#define ns_common_type(__ns) \
|
|
||||||
_Generic((__ns), \
|
|
||||||
struct cgroup_namespace *: CLONE_NEWCGROUP, \
|
|
||||||
struct ipc_namespace *: CLONE_NEWIPC, \
|
|
||||||
struct mnt_namespace *: CLONE_NEWNS, \
|
|
||||||
struct net *: CLONE_NEWNET, \
|
|
||||||
struct pid_namespace *: CLONE_NEWPID, \
|
|
||||||
struct time_namespace *: CLONE_NEWTIME, \
|
|
||||||
struct user_namespace *: CLONE_NEWUSER, \
|
|
||||||
struct uts_namespace *: CLONE_NEWUTS)
|
|
||||||
|
|
||||||
#define ns_common_init(__ns) \
|
#define ns_common_init(__ns) \
|
||||||
__ns_common_init(to_ns_common(__ns), \
|
__ns_common_init(to_ns_common(__ns), \
|
||||||
|
|
@ -133,21 +55,96 @@ void __ns_common_free(struct ns_common *ns);
|
||||||
|
|
||||||
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
|
#define ns_common_free(__ns) __ns_common_free(to_ns_common((__ns)))
|
||||||
|
|
||||||
|
static __always_inline __must_check int __ns_ref_active_read(const struct ns_common *ns)
|
||||||
|
{
|
||||||
|
return atomic_read(&ns->__ns_ref_active);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline __must_check int __ns_ref_read(const struct ns_common *ns)
|
||||||
|
{
|
||||||
|
return refcount_read(&ns->__ns_ref);
|
||||||
|
}
|
||||||
|
|
||||||
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
|
static __always_inline __must_check bool __ns_ref_put(struct ns_common *ns)
|
||||||
{
|
{
|
||||||
return refcount_dec_and_test(&ns->__ns_ref);
|
if (is_ns_init_id(ns)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (refcount_dec_and_test(&ns->__ns_ref)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
|
static __always_inline __must_check bool __ns_ref_get(struct ns_common *ns)
|
||||||
{
|
{
|
||||||
return refcount_inc_not_zero(&ns->__ns_ref);
|
if (is_ns_init_id(ns)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (refcount_inc_not_zero(&ns->__ns_ref))
|
||||||
|
return true;
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns));
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define ns_ref_read(__ns) refcount_read(&to_ns_common((__ns))->__ns_ref)
|
static __always_inline void __ns_ref_inc(struct ns_common *ns)
|
||||||
#define ns_ref_inc(__ns) refcount_inc(&to_ns_common((__ns))->__ns_ref)
|
{
|
||||||
#define ns_ref_get(__ns) __ns_ref_get(to_ns_common((__ns)))
|
if (is_ns_init_id(ns)) {
|
||||||
#define ns_ref_put(__ns) __ns_ref_put(to_ns_common((__ns)))
|
VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
|
||||||
#define ns_ref_put_and_lock(__ns, __lock) \
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
|
||||||
refcount_dec_and_lock(&to_ns_common((__ns))->__ns_ref, (__lock))
|
return;
|
||||||
|
}
|
||||||
|
refcount_inc(&ns->__ns_ref);
|
||||||
|
}
|
||||||
|
|
||||||
|
static __always_inline __must_check bool __ns_ref_dec_and_lock(struct ns_common *ns,
|
||||||
|
spinlock_t *ns_lock)
|
||||||
|
{
|
||||||
|
if (is_ns_init_id(ns)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_read(ns) != 1);
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) != 1);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return refcount_dec_and_lock(&ns->__ns_ref, ns_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define ns_ref_read(__ns) __ns_ref_read(to_ns_common((__ns)))
|
||||||
|
#define ns_ref_inc(__ns) \
|
||||||
|
do { if (__ns) __ns_ref_inc(to_ns_common((__ns))); } while (0)
|
||||||
|
#define ns_ref_get(__ns) \
|
||||||
|
((__ns) ? __ns_ref_get(to_ns_common((__ns))) : false)
|
||||||
|
#define ns_ref_put(__ns) \
|
||||||
|
((__ns) ? __ns_ref_put(to_ns_common((__ns))) : false)
|
||||||
|
#define ns_ref_put_and_lock(__ns, __ns_lock) \
|
||||||
|
((__ns) ? __ns_ref_dec_and_lock(to_ns_common((__ns)), __ns_lock) : false)
|
||||||
|
|
||||||
|
#define ns_ref_active_read(__ns) \
|
||||||
|
((__ns) ? __ns_ref_active_read(to_ns_common(__ns)) : 0)
|
||||||
|
|
||||||
|
void __ns_ref_active_put(struct ns_common *ns);
|
||||||
|
|
||||||
|
#define ns_ref_active_put(__ns) \
|
||||||
|
do { if (__ns) __ns_ref_active_put(to_ns_common(__ns)); } while (0)
|
||||||
|
|
||||||
|
static __always_inline struct ns_common *__must_check ns_get_unless_inactive(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
if (!__ns_ref_active_read(ns)) {
|
||||||
|
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
if (!__ns_ref_get(ns))
|
||||||
|
return NULL;
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
void __ns_ref_active_get(struct ns_common *ns);
|
||||||
|
|
||||||
|
#define ns_ref_active_get(__ns) \
|
||||||
|
do { if (__ns) __ns_ref_active_get(to_ns_common(__ns)); } while (0)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
||||||
|
|
@ -37,4 +37,7 @@ void nsfs_init(void);
|
||||||
|
|
||||||
#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
|
#define current_in_namespace(__ns) (__current_namespace_from_type(__ns) == __ns)
|
||||||
|
|
||||||
|
void nsproxy_ns_active_get(struct nsproxy *ns);
|
||||||
|
void nsproxy_ns_active_put(struct nsproxy *ns);
|
||||||
|
|
||||||
#endif /* _LINUX_NSFS_H */
|
#endif /* _LINUX_NSFS_H */
|
||||||
|
|
|
||||||
|
|
@ -93,10 +93,13 @@ static inline struct cred *nsset_cred(struct nsset *set)
|
||||||
*/
|
*/
|
||||||
|
|
||||||
int copy_namespaces(u64 flags, struct task_struct *tsk);
|
int copy_namespaces(u64 flags, struct task_struct *tsk);
|
||||||
void exit_task_namespaces(struct task_struct *tsk);
|
void switch_cred_namespaces(const struct cred *old, const struct cred *new);
|
||||||
|
void exit_nsproxy_namespaces(struct task_struct *tsk);
|
||||||
|
void get_cred_namespaces(struct task_struct *tsk);
|
||||||
|
void exit_cred_namespaces(struct task_struct *tsk);
|
||||||
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
|
void switch_task_namespaces(struct task_struct *tsk, struct nsproxy *new);
|
||||||
int exec_task_namespaces(void);
|
int exec_task_namespaces(void);
|
||||||
void free_nsproxy(struct nsproxy *ns);
|
void deactivate_nsproxy(struct nsproxy *ns);
|
||||||
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
|
int unshare_nsproxy_namespaces(unsigned long, struct nsproxy **,
|
||||||
struct cred *, struct fs_struct *);
|
struct cred *, struct fs_struct *);
|
||||||
int __init nsproxy_cache_init(void);
|
int __init nsproxy_cache_init(void);
|
||||||
|
|
@ -104,7 +107,7 @@ int __init nsproxy_cache_init(void);
|
||||||
static inline void put_nsproxy(struct nsproxy *ns)
|
static inline void put_nsproxy(struct nsproxy *ns)
|
||||||
{
|
{
|
||||||
if (refcount_dec_and_test(&ns->count))
|
if (refcount_dec_and_test(&ns->count))
|
||||||
free_nsproxy(ns);
|
deactivate_nsproxy(ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void get_nsproxy(struct nsproxy *ns)
|
static inline void get_nsproxy(struct nsproxy *ns)
|
||||||
|
|
|
||||||
|
|
@ -1,22 +1,34 @@
|
||||||
/* SPDX-License-Identifier: GPL-2.0 */
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
|
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
|
||||||
#ifndef _LINUX_NSTREE_H
|
#ifndef _LINUX_NSTREE_H
|
||||||
#define _LINUX_NSTREE_H
|
#define _LINUX_NSTREE_H
|
||||||
|
|
||||||
#include <linux/ns_common.h>
|
#include <linux/ns/nstree_types.h>
|
||||||
#include <linux/nsproxy.h>
|
#include <linux/nsproxy.h>
|
||||||
#include <linux/rbtree.h>
|
#include <linux/rbtree.h>
|
||||||
#include <linux/seqlock.h>
|
#include <linux/seqlock.h>
|
||||||
#include <linux/rculist.h>
|
#include <linux/rculist.h>
|
||||||
#include <linux/cookie.h>
|
#include <linux/cookie.h>
|
||||||
|
#include <uapi/linux/nsfs.h>
|
||||||
|
|
||||||
extern struct ns_tree cgroup_ns_tree;
|
struct ns_common;
|
||||||
extern struct ns_tree ipc_ns_tree;
|
|
||||||
extern struct ns_tree mnt_ns_tree;
|
extern struct ns_tree_root cgroup_ns_tree;
|
||||||
extern struct ns_tree net_ns_tree;
|
extern struct ns_tree_root ipc_ns_tree;
|
||||||
extern struct ns_tree pid_ns_tree;
|
extern struct ns_tree_root mnt_ns_tree;
|
||||||
extern struct ns_tree time_ns_tree;
|
extern struct ns_tree_root net_ns_tree;
|
||||||
extern struct ns_tree user_ns_tree;
|
extern struct ns_tree_root pid_ns_tree;
|
||||||
extern struct ns_tree uts_ns_tree;
|
extern struct ns_tree_root time_ns_tree;
|
||||||
|
extern struct ns_tree_root user_ns_tree;
|
||||||
|
extern struct ns_tree_root uts_ns_tree;
|
||||||
|
|
||||||
|
void ns_tree_node_init(struct ns_tree_node *node);
|
||||||
|
void ns_tree_root_init(struct ns_tree_root *root);
|
||||||
|
bool ns_tree_node_empty(const struct ns_tree_node *node);
|
||||||
|
struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
|
||||||
|
struct ns_tree_root *root,
|
||||||
|
int (*cmp)(struct rb_node *, const struct rb_node *));
|
||||||
|
void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root);
|
||||||
|
|
||||||
#define to_ns_tree(__ns) \
|
#define to_ns_tree(__ns) \
|
||||||
_Generic((__ns), \
|
_Generic((__ns), \
|
||||||
|
|
@ -29,17 +41,21 @@ extern struct ns_tree uts_ns_tree;
|
||||||
struct user_namespace *: &(user_ns_tree), \
|
struct user_namespace *: &(user_ns_tree), \
|
||||||
struct uts_namespace *: &(uts_ns_tree))
|
struct uts_namespace *: &(uts_ns_tree))
|
||||||
|
|
||||||
u64 ns_tree_gen_id(struct ns_common *ns);
|
#define ns_tree_gen_id(__ns) \
|
||||||
void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree);
|
__ns_tree_gen_id(to_ns_common(__ns), \
|
||||||
void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree);
|
(((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0))
|
||||||
|
|
||||||
|
u64 __ns_tree_gen_id(struct ns_common *ns, u64 id);
|
||||||
|
void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree);
|
||||||
|
void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree);
|
||||||
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
|
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type);
|
||||||
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
|
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
|
||||||
struct ns_tree *ns_tree,
|
struct ns_tree_root *ns_tree,
|
||||||
bool previous);
|
bool previous);
|
||||||
|
|
||||||
static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
|
static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_tree, u64 id)
|
||||||
{
|
{
|
||||||
ns_tree_gen_id(ns);
|
__ns_tree_gen_id(ns, id);
|
||||||
__ns_tree_add_raw(ns, ns_tree);
|
__ns_tree_add_raw(ns, ns_tree);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -59,7 +75,9 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
|
||||||
* This function assigns a new id to the namespace and adds it to the
|
* This function assigns a new id to the namespace and adds it to the
|
||||||
* appropriate namespace tree and list.
|
* appropriate namespace tree and list.
|
||||||
*/
|
*/
|
||||||
#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns))
|
#define ns_tree_add(__ns) \
|
||||||
|
__ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns), \
|
||||||
|
(((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0))
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ns_tree_remove - Remove a namespace from a namespace tree
|
* ns_tree_remove - Remove a namespace from a namespace tree
|
||||||
|
|
@ -73,6 +91,6 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree)
|
||||||
#define ns_tree_adjoined_rcu(__ns, __previous) \
|
#define ns_tree_adjoined_rcu(__ns, __previous) \
|
||||||
__ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
|
__ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous)
|
||||||
|
|
||||||
#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node))
|
#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node.ns_node))
|
||||||
|
|
||||||
#endif /* _LINUX_NSTREE_H */
|
#endif /* _LINUX_NSTREE_H */
|
||||||
|
|
|
||||||
|
|
@ -61,8 +61,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
|
||||||
|
|
||||||
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
|
||||||
{
|
{
|
||||||
if (ns != &init_pid_ns)
|
ns_ref_inc(ns);
|
||||||
ns_ref_inc(ns);
|
|
||||||
return ns;
|
return ns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,7 @@ struct pseudo_fs_context {
|
||||||
const struct xattr_handler * const *xattr;
|
const struct xattr_handler * const *xattr;
|
||||||
const struct dentry_operations *dops;
|
const struct dentry_operations *dops;
|
||||||
unsigned long magic;
|
unsigned long magic;
|
||||||
|
unsigned int s_d_flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
|
struct pseudo_fs_context *init_pseudo(struct fs_context *fc,
|
||||||
|
|
|
||||||
|
|
@ -77,6 +77,7 @@ struct cachestat_range;
|
||||||
struct cachestat;
|
struct cachestat;
|
||||||
struct statmount;
|
struct statmount;
|
||||||
struct mnt_id_req;
|
struct mnt_id_req;
|
||||||
|
struct ns_id_req;
|
||||||
struct xattr_args;
|
struct xattr_args;
|
||||||
struct file_attr;
|
struct file_attr;
|
||||||
|
|
||||||
|
|
@ -437,6 +438,9 @@ asmlinkage long sys_statmount(const struct mnt_id_req __user *req,
|
||||||
asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
|
asmlinkage long sys_listmount(const struct mnt_id_req __user *req,
|
||||||
u64 __user *mnt_ids, size_t nr_mnt_ids,
|
u64 __user *mnt_ids, size_t nr_mnt_ids,
|
||||||
unsigned int flags);
|
unsigned int flags);
|
||||||
|
asmlinkage long sys_listns(const struct ns_id_req __user *req,
|
||||||
|
u64 __user *ns_ids, size_t nr_ns_ids,
|
||||||
|
unsigned int flags);
|
||||||
asmlinkage long sys_truncate(const char __user *path, long length);
|
asmlinkage long sys_truncate(const char __user *path, long length);
|
||||||
asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
|
asmlinkage long sys_ftruncate(unsigned int fd, off_t length);
|
||||||
#if BITS_PER_LONG == 32
|
#if BITS_PER_LONG == 32
|
||||||
|
|
|
||||||
|
|
@ -166,13 +166,13 @@ static inline void set_userns_rlimit_max(struct user_namespace *ns,
|
||||||
ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
|
ns->rlimit_max[type] = max <= LONG_MAX ? max : LONG_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_USER_NS
|
|
||||||
|
|
||||||
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
|
static inline struct user_namespace *to_user_ns(struct ns_common *ns)
|
||||||
{
|
{
|
||||||
return container_of(ns, struct user_namespace, ns);
|
return container_of(ns, struct user_namespace, ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_USER_NS
|
||||||
|
|
||||||
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
|
||||||
{
|
{
|
||||||
if (ns)
|
if (ns)
|
||||||
|
|
|
||||||
|
|
@ -857,9 +857,11 @@ __SYSCALL(__NR_open_tree_attr, sys_open_tree_attr)
|
||||||
__SYSCALL(__NR_file_getattr, sys_file_getattr)
|
__SYSCALL(__NR_file_getattr, sys_file_getattr)
|
||||||
#define __NR_file_setattr 469
|
#define __NR_file_setattr 469
|
||||||
__SYSCALL(__NR_file_setattr, sys_file_setattr)
|
__SYSCALL(__NR_file_setattr, sys_file_setattr)
|
||||||
|
#define __NR_listns 470
|
||||||
|
__SYSCALL(__NR_listns, sys_listns)
|
||||||
|
|
||||||
#undef __NR_syscalls
|
#undef __NR_syscalls
|
||||||
#define __NR_syscalls 470
|
#define __NR_syscalls 471
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* 32 bit systems traditionally used different
|
* 32 bit systems traditionally used different
|
||||||
|
|
|
||||||
|
|
@ -67,4 +67,62 @@ struct nsfs_file_handle {
|
||||||
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
|
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
|
||||||
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
|
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
|
||||||
|
|
||||||
|
enum init_ns_id {
|
||||||
|
IPC_NS_INIT_ID = 1ULL,
|
||||||
|
UTS_NS_INIT_ID = 2ULL,
|
||||||
|
USER_NS_INIT_ID = 3ULL,
|
||||||
|
PID_NS_INIT_ID = 4ULL,
|
||||||
|
CGROUP_NS_INIT_ID = 5ULL,
|
||||||
|
TIME_NS_INIT_ID = 6ULL,
|
||||||
|
NET_NS_INIT_ID = 7ULL,
|
||||||
|
MNT_NS_INIT_ID = 8ULL,
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
NS_LAST_INIT_ID = MNT_NS_INIT_ID,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ns_type {
|
||||||
|
TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
|
||||||
|
MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
|
||||||
|
CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
|
||||||
|
UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
|
||||||
|
IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
|
||||||
|
USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
|
||||||
|
PID_NS = (1ULL << 29), /* CLONE_NEWPID */
|
||||||
|
NET_NS = (1ULL << 30), /* CLONE_NEWNET */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ns_id_req - namespace ID request structure
|
||||||
|
* @size: size of this structure
|
||||||
|
* @spare: reserved for future use
|
||||||
|
* @filter: filter mask
|
||||||
|
* @ns_id: last namespace id
|
||||||
|
* @user_ns_id: owning user namespace ID
|
||||||
|
*
|
||||||
|
* Structure for passing namespace ID and miscellaneous parameters to
|
||||||
|
* statns(2) and listns(2).
|
||||||
|
*
|
||||||
|
* For statns(2) @param represents the request mask.
|
||||||
|
* For listns(2) @param represents the last listed mount id (or zero).
|
||||||
|
*/
|
||||||
|
struct ns_id_req {
|
||||||
|
__u32 size;
|
||||||
|
__u32 spare;
|
||||||
|
__u64 ns_id;
|
||||||
|
struct /* listns */ {
|
||||||
|
__u32 ns_type;
|
||||||
|
__u32 spare2;
|
||||||
|
__u64 user_ns_id;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special @user_ns_id value that can be passed to listns()
|
||||||
|
*/
|
||||||
|
#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
|
||||||
|
|
||||||
|
/* List of all ns_id_req versions. */
|
||||||
|
#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
|
||||||
|
|
||||||
#endif /* __LINUX_NSFS_H */
|
#endif /* __LINUX_NSFS_H */
|
||||||
|
|
|
||||||
|
|
@ -8,8 +8,7 @@
|
||||||
#include <linux/utsname.h>
|
#include <linux/utsname.h>
|
||||||
|
|
||||||
struct uts_namespace init_uts_ns = {
|
struct uts_namespace init_uts_ns = {
|
||||||
.ns.ns_type = ns_common_type(&init_uts_ns),
|
.ns = NS_COMMON_INIT(init_uts_ns),
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(2),
|
|
||||||
.name = {
|
.name = {
|
||||||
.sysname = UTS_SYSNAME,
|
.sysname = UTS_SYSNAME,
|
||||||
.nodename = UTS_NODENAME,
|
.nodename = UTS_NODENAME,
|
||||||
|
|
@ -19,10 +18,6 @@ struct uts_namespace init_uts_ns = {
|
||||||
.domainname = UTS_DOMAINNAME,
|
.domainname = UTS_DOMAINNAME,
|
||||||
},
|
},
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.inum = ns_init_inum(&init_uts_ns),
|
|
||||||
#ifdef CONFIG_UTS_NS
|
|
||||||
.ns.ops = &utsns_operations,
|
|
||||||
#endif
|
|
||||||
};
|
};
|
||||||
|
|
||||||
/* FIXED STRINGS! Don't touch! */
|
/* FIXED STRINGS! Don't touch! */
|
||||||
|
|
|
||||||
|
|
@ -27,13 +27,8 @@ DEFINE_SPINLOCK(mq_lock);
|
||||||
* and not CONFIG_IPC_NS.
|
* and not CONFIG_IPC_NS.
|
||||||
*/
|
*/
|
||||||
struct ipc_namespace init_ipc_ns = {
|
struct ipc_namespace init_ipc_ns = {
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(1),
|
.ns = NS_COMMON_INIT(init_ipc_ns),
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.inum = ns_init_inum(&init_ipc_ns),
|
|
||||||
#ifdef CONFIG_IPC_NS
|
|
||||||
.ns.ops = &ipcns_operations,
|
|
||||||
#endif
|
|
||||||
.ns.ns_type = ns_common_type(&init_ipc_ns),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct msg_msgseg {
|
struct msg_msgseg {
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
|
||||||
if (err)
|
if (err)
|
||||||
goto fail_free;
|
goto fail_free;
|
||||||
|
|
||||||
|
ns_tree_gen_id(ns);
|
||||||
ns->user_ns = get_user_ns(user_ns);
|
ns->user_ns = get_user_ns(user_ns);
|
||||||
ns->ucounts = ucounts;
|
ns->ucounts = ucounts;
|
||||||
|
|
||||||
|
|
@ -86,7 +87,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
|
||||||
|
|
||||||
sem_init_ns(ns);
|
sem_init_ns(ns);
|
||||||
shm_init_ns(ns);
|
shm_init_ns(ns);
|
||||||
ns_tree_add(ns);
|
ns_tree_add_raw(ns);
|
||||||
|
|
||||||
return ns;
|
return ns;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -250,12 +250,9 @@ bool cgroup_enable_per_threadgroup_rwsem __read_mostly;
|
||||||
|
|
||||||
/* cgroup namespace for init task */
|
/* cgroup namespace for init task */
|
||||||
struct cgroup_namespace init_cgroup_ns = {
|
struct cgroup_namespace init_cgroup_ns = {
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(2),
|
.ns = NS_COMMON_INIT(init_cgroup_ns),
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.ops = &cgroupns_operations,
|
|
||||||
.ns.inum = ns_init_inum(&init_cgroup_ns),
|
|
||||||
.root_cset = &init_css_set,
|
.root_cset = &init_css_set,
|
||||||
.ns.ns_type = ns_common_type(&init_cgroup_ns),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct file_system_type cgroup2_fs_type;
|
static struct file_system_type cgroup2_fs_type;
|
||||||
|
|
@ -1522,9 +1519,9 @@ static struct cgroup *current_cgns_cgroup_dfl(void)
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* NOTE: This function may be called from bpf_cgroup_from_id()
|
* NOTE: This function may be called from bpf_cgroup_from_id()
|
||||||
* on a task which has already passed exit_task_namespaces() and
|
* on a task which has already passed exit_nsproxy_namespaces()
|
||||||
* nsproxy == NULL. Fall back to cgrp_dfl_root which will make all
|
* and nsproxy == NULL. Fall back to cgrp_dfl_root which will
|
||||||
* cgroups visible for lookups.
|
* make all cgroups visible for lookups.
|
||||||
*/
|
*/
|
||||||
return &cgrp_dfl_root.cgrp;
|
return &cgrp_dfl_root.cgrp;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -30,7 +30,6 @@ static struct cgroup_namespace *alloc_cgroup_ns(void)
|
||||||
ret = ns_common_init(new_ns);
|
ret = ns_common_init(new_ns);
|
||||||
if (ret)
|
if (ret)
|
||||||
return ERR_PTR(ret);
|
return ERR_PTR(ret);
|
||||||
ns_tree_add(new_ns);
|
|
||||||
return no_free_ptr(new_ns);
|
return no_free_ptr(new_ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -86,6 +85,7 @@ struct cgroup_namespace *copy_cgroup_ns(u64 flags,
|
||||||
new_ns->ucounts = ucounts;
|
new_ns->ucounts = ucounts;
|
||||||
new_ns->root_cset = cset;
|
new_ns->root_cset = cset;
|
||||||
|
|
||||||
|
ns_tree_add(new_ns);
|
||||||
return new_ns;
|
return new_ns;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -306,6 +306,7 @@ int copy_creds(struct task_struct *p, u64 clone_flags)
|
||||||
kdebug("share_creds(%p{%ld})",
|
kdebug("share_creds(%p{%ld})",
|
||||||
p->cred, atomic_long_read(&p->cred->usage));
|
p->cred, atomic_long_read(&p->cred->usage));
|
||||||
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
||||||
|
get_cred_namespaces(p);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -343,6 +344,8 @@ int copy_creds(struct task_struct *p, u64 clone_flags)
|
||||||
|
|
||||||
p->cred = p->real_cred = get_cred(new);
|
p->cred = p->real_cred = get_cred(new);
|
||||||
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
inc_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
||||||
|
get_cred_namespaces(p);
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
error_put:
|
error_put:
|
||||||
|
|
@ -435,10 +438,13 @@ int commit_creds(struct cred *new)
|
||||||
*/
|
*/
|
||||||
if (new->user != old->user || new->user_ns != old->user_ns)
|
if (new->user != old->user || new->user_ns != old->user_ns)
|
||||||
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
inc_rlimit_ucounts(new->ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
||||||
|
|
||||||
rcu_assign_pointer(task->real_cred, new);
|
rcu_assign_pointer(task->real_cred, new);
|
||||||
rcu_assign_pointer(task->cred, new);
|
rcu_assign_pointer(task->cred, new);
|
||||||
if (new->user != old->user || new->user_ns != old->user_ns)
|
if (new->user != old->user || new->user_ns != old->user_ns)
|
||||||
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
dec_rlimit_ucounts(old->ucounts, UCOUNT_RLIMIT_NPROC, 1);
|
||||||
|
if (new->user_ns != old->user_ns)
|
||||||
|
switch_cred_namespaces(old, new);
|
||||||
|
|
||||||
/* send notifications */
|
/* send notifications */
|
||||||
if (!uid_eq(new->uid, old->uid) ||
|
if (!uid_eq(new->uid, old->uid) ||
|
||||||
|
|
|
||||||
|
|
@ -291,6 +291,7 @@ void release_task(struct task_struct *p)
|
||||||
write_unlock_irq(&tasklist_lock);
|
write_unlock_irq(&tasklist_lock);
|
||||||
/* @thread_pid can't go away until free_pids() below */
|
/* @thread_pid can't go away until free_pids() below */
|
||||||
proc_flush_pid(thread_pid);
|
proc_flush_pid(thread_pid);
|
||||||
|
exit_cred_namespaces(p);
|
||||||
add_device_randomness(&p->se.sum_exec_runtime,
|
add_device_randomness(&p->se.sum_exec_runtime,
|
||||||
sizeof(p->se.sum_exec_runtime));
|
sizeof(p->se.sum_exec_runtime));
|
||||||
free_pids(post.pids);
|
free_pids(post.pids);
|
||||||
|
|
@ -962,7 +963,7 @@ void __noreturn do_exit(long code)
|
||||||
exit_fs(tsk);
|
exit_fs(tsk);
|
||||||
if (group_dead)
|
if (group_dead)
|
||||||
disassociate_ctty(1);
|
disassociate_ctty(1);
|
||||||
exit_task_namespaces(tsk);
|
exit_nsproxy_namespaces(tsk);
|
||||||
exit_task_work(tsk);
|
exit_task_work(tsk);
|
||||||
exit_thread(tsk);
|
exit_thread(tsk);
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -2453,7 +2453,7 @@ __latent_entropy struct task_struct *copy_process(
|
||||||
if (p->io_context)
|
if (p->io_context)
|
||||||
exit_io_context(p);
|
exit_io_context(p);
|
||||||
bad_fork_cleanup_namespaces:
|
bad_fork_cleanup_namespaces:
|
||||||
exit_task_namespaces(p);
|
exit_nsproxy_namespaces(p);
|
||||||
bad_fork_cleanup_mm:
|
bad_fork_cleanup_mm:
|
||||||
if (p->mm) {
|
if (p->mm) {
|
||||||
mm_clear_owner(p->mm, p);
|
mm_clear_owner(p->mm, p);
|
||||||
|
|
@ -2487,6 +2487,7 @@ __latent_entropy struct task_struct *copy_process(
|
||||||
delayacct_tsk_free(p);
|
delayacct_tsk_free(p);
|
||||||
bad_fork_cleanup_count:
|
bad_fork_cleanup_count:
|
||||||
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
dec_rlimit_ucounts(task_ucounts(p), UCOUNT_RLIMIT_NPROC, 1);
|
||||||
|
exit_cred_namespaces(p);
|
||||||
exit_creds(p);
|
exit_creds(p);
|
||||||
bad_fork_free:
|
bad_fork_free:
|
||||||
WRITE_ONCE(p->__state, TASK_DEAD);
|
WRITE_ONCE(p->__state, TASK_DEAD);
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,10 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
|
||||||
|
|
||||||
#include <linux/ns_common.h>
|
#include <linux/ns_common.h>
|
||||||
|
#include <linux/nstree.h>
|
||||||
#include <linux/proc_ns.h>
|
#include <linux/proc_ns.h>
|
||||||
|
#include <linux/user_namespace.h>
|
||||||
#include <linux/vfsdebug.h>
|
#include <linux/vfsdebug.h>
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_VFS
|
#ifdef CONFIG_DEBUG_VFS
|
||||||
|
|
@ -52,26 +55,257 @@ static void ns_debug(struct ns_common *ns, const struct proc_ns_operations *ops)
|
||||||
|
|
||||||
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum)
|
int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_operations *ops, int inum)
|
||||||
{
|
{
|
||||||
|
int ret = 0;
|
||||||
|
|
||||||
refcount_set(&ns->__ns_ref, 1);
|
refcount_set(&ns->__ns_ref, 1);
|
||||||
ns->stashed = NULL;
|
ns->stashed = NULL;
|
||||||
ns->ops = ops;
|
ns->ops = ops;
|
||||||
ns->ns_id = 0;
|
ns->ns_id = 0;
|
||||||
ns->ns_type = ns_type;
|
ns->ns_type = ns_type;
|
||||||
RB_CLEAR_NODE(&ns->ns_tree_node);
|
ns_tree_node_init(&ns->ns_tree_node);
|
||||||
INIT_LIST_HEAD(&ns->ns_list_node);
|
ns_tree_node_init(&ns->ns_unified_node);
|
||||||
|
ns_tree_node_init(&ns->ns_owner_node);
|
||||||
|
ns_tree_root_init(&ns->ns_owner_root);
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_VFS
|
#ifdef CONFIG_DEBUG_VFS
|
||||||
ns_debug(ns, ops);
|
ns_debug(ns, ops);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (inum) {
|
if (inum)
|
||||||
ns->inum = inum;
|
ns->inum = inum;
|
||||||
return 0;
|
else
|
||||||
}
|
ret = proc_alloc_inum(&ns->inum);
|
||||||
return proc_alloc_inum(&ns->inum);
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
/*
|
||||||
|
* Tree ref starts at 0. It's incremented when namespace enters
|
||||||
|
* active use (installed in nsproxy) and decremented when all
|
||||||
|
* active uses are gone. Initial namespaces are always active.
|
||||||
|
*/
|
||||||
|
if (is_ns_init_inum(ns))
|
||||||
|
atomic_set(&ns->__ns_ref_active, 1);
|
||||||
|
else
|
||||||
|
atomic_set(&ns->__ns_ref_active, 0);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __ns_common_free(struct ns_common *ns)
|
void __ns_common_free(struct ns_common *ns)
|
||||||
{
|
{
|
||||||
proc_free_inum(ns->inum);
|
proc_free_inum(ns->inum);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ns_common *__must_check ns_owner(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
struct user_namespace *owner;
|
||||||
|
|
||||||
|
if (unlikely(!ns->ops))
|
||||||
|
return NULL;
|
||||||
|
VFS_WARN_ON_ONCE(!ns->ops->owner);
|
||||||
|
owner = ns->ops->owner(ns);
|
||||||
|
VFS_WARN_ON_ONCE(!owner && ns != to_ns_common(&init_user_ns));
|
||||||
|
if (!owner)
|
||||||
|
return NULL;
|
||||||
|
/* Skip init_user_ns as it's always active */
|
||||||
|
if (owner == &init_user_ns)
|
||||||
|
return NULL;
|
||||||
|
return to_ns_common(owner);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The active reference count works by having each namespace that gets
|
||||||
|
* created take a single active reference on its owning user namespace.
|
||||||
|
* That single reference is only released once the child namespace's
|
||||||
|
* active count itself goes down.
|
||||||
|
*
|
||||||
|
* A regular namespace tree might look as follow:
|
||||||
|
* Legend:
|
||||||
|
* + : adding active reference
|
||||||
|
* - : dropping active reference
|
||||||
|
* x : always active (initial namespace)
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* + +
|
||||||
|
* user_ns1 (2)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* + + +
|
||||||
|
* user_ns2 (3)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* If both net_ns and pid_ns put their last active reference on
|
||||||
|
* themselves it will cascade to user_ns1 dropping its own active
|
||||||
|
* reference and dropping one active reference on user_ns2:
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* - -
|
||||||
|
* user_ns1 (0)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* + - +
|
||||||
|
* user_ns2 (2)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* The iteration stops once we reach a namespace that still has active
|
||||||
|
* references.
|
||||||
|
*/
|
||||||
|
void __ns_ref_active_put(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
/* Initial namespaces are always active. */
|
||||||
|
if (is_ns_init_id(ns))
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
|
||||||
|
VFS_WARN_ON_ONCE(!__ns_ref_read(ns));
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
ns = ns_owner(ns);
|
||||||
|
if (!ns)
|
||||||
|
return;
|
||||||
|
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
|
||||||
|
if (!atomic_dec_and_test(&ns->__ns_ref_active)) {
|
||||||
|
VFS_WARN_ON_ONCE(__ns_ref_active_read(ns) < 0);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The active reference count works by having each namespace that gets
|
||||||
|
* created take a single active reference on its owning user namespace.
|
||||||
|
* That single reference is only released once the child namespace's
|
||||||
|
* active count itself goes down. This makes it possible to efficiently
|
||||||
|
* resurrect a namespace tree:
|
||||||
|
*
|
||||||
|
* A regular namespace tree might look as follow:
|
||||||
|
* Legend:
|
||||||
|
* + : adding active reference
|
||||||
|
* - : dropping active reference
|
||||||
|
* x : always active (initial namespace)
|
||||||
|
*
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* + +
|
||||||
|
* user_ns1 (2)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* + + +
|
||||||
|
* user_ns2 (3)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* If both net_ns and pid_ns put their last active reference on
|
||||||
|
* themselves it will cascade to user_ns1 dropping its own active
|
||||||
|
* reference and dropping one active reference on user_ns2:
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* - -
|
||||||
|
* user_ns1 (0)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* + - +
|
||||||
|
* user_ns2 (2)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* Assume the whole tree is dead but all namespaces are still active:
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* - -
|
||||||
|
* user_ns1 (0)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* - - -
|
||||||
|
* user_ns2 (0)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* Now assume the net_ns gets resurrected (.e.g., via the SIOCGSKNS ioctl()):
|
||||||
|
*
|
||||||
|
* net_ns pid_ns
|
||||||
|
* \ /
|
||||||
|
* + -
|
||||||
|
* user_ns1 (0)
|
||||||
|
* |
|
||||||
|
* ipc_ns | uts_ns
|
||||||
|
* \ | /
|
||||||
|
* - + -
|
||||||
|
* user_ns2 (0)
|
||||||
|
* |
|
||||||
|
* cgroup_ns | mnt_ns
|
||||||
|
* \ | /
|
||||||
|
* x x x
|
||||||
|
* init_user_ns (1)
|
||||||
|
*
|
||||||
|
* If net_ns had a zero reference count and we bumped it we also need to
|
||||||
|
* take another reference on its owning user namespace. Similarly, if
|
||||||
|
* pid_ns had a zero reference count it also needs to take another
|
||||||
|
* reference on its owning user namespace. So both net_ns and pid_ns
|
||||||
|
* will each have their own reference on the owning user namespace.
|
||||||
|
*
|
||||||
|
* If the owning user namespace user_ns1 had a zero reference count then
|
||||||
|
* it also needs to take another reference on its owning user namespace
|
||||||
|
* and so on.
|
||||||
|
*/
|
||||||
|
void __ns_ref_active_get(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
int prev;
|
||||||
|
|
||||||
|
/* Initial namespaces are always active. */
|
||||||
|
if (is_ns_init_id(ns))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/* If we didn't resurrect the namespace we're done. */
|
||||||
|
prev = atomic_fetch_add(1, &ns->__ns_ref_active);
|
||||||
|
VFS_WARN_ON_ONCE(prev < 0);
|
||||||
|
if (likely(prev))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We did resurrect it. Walk the ownership hierarchy upwards
|
||||||
|
* until we found an owning user namespace that is active.
|
||||||
|
*/
|
||||||
|
for (;;) {
|
||||||
|
ns = ns_owner(ns);
|
||||||
|
if (!ns)
|
||||||
|
return;
|
||||||
|
|
||||||
|
VFS_WARN_ON_ONCE(is_ns_init_id(ns));
|
||||||
|
prev = atomic_fetch_add(1, &ns->__ns_ref_active);
|
||||||
|
VFS_WARN_ON_ONCE(prev < 0);
|
||||||
|
if (likely(prev))
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,7 @@
|
||||||
#include <linux/syscalls.h>
|
#include <linux/syscalls.h>
|
||||||
#include <linux/cgroup.h>
|
#include <linux/cgroup.h>
|
||||||
#include <linux/perf_event.h>
|
#include <linux/perf_event.h>
|
||||||
|
#include <linux/nstree.h>
|
||||||
|
|
||||||
static struct kmem_cache *nsproxy_cachep;
|
static struct kmem_cache *nsproxy_cachep;
|
||||||
|
|
||||||
|
|
@ -59,6 +60,25 @@ static inline struct nsproxy *create_nsproxy(void)
|
||||||
return nsproxy;
|
return nsproxy;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline void nsproxy_free(struct nsproxy *ns)
|
||||||
|
{
|
||||||
|
put_mnt_ns(ns->mnt_ns);
|
||||||
|
put_uts_ns(ns->uts_ns);
|
||||||
|
put_ipc_ns(ns->ipc_ns);
|
||||||
|
put_pid_ns(ns->pid_ns_for_children);
|
||||||
|
put_time_ns(ns->time_ns);
|
||||||
|
put_time_ns(ns->time_ns_for_children);
|
||||||
|
put_cgroup_ns(ns->cgroup_ns);
|
||||||
|
put_net(ns->net_ns);
|
||||||
|
kmem_cache_free(nsproxy_cachep, ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
void deactivate_nsproxy(struct nsproxy *ns)
|
||||||
|
{
|
||||||
|
nsproxy_ns_active_put(ns);
|
||||||
|
nsproxy_free(ns);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create new nsproxy and all of its the associated namespaces.
|
* Create new nsproxy and all of its the associated namespaces.
|
||||||
* Return the newly created nsproxy. Do not attach this to the task,
|
* Return the newly created nsproxy. Do not attach this to the task,
|
||||||
|
|
@ -179,23 +199,11 @@ int copy_namespaces(u64 flags, struct task_struct *tsk)
|
||||||
if ((flags & CLONE_VM) == 0)
|
if ((flags & CLONE_VM) == 0)
|
||||||
timens_on_fork(new_ns, tsk);
|
timens_on_fork(new_ns, tsk);
|
||||||
|
|
||||||
|
nsproxy_ns_active_get(new_ns);
|
||||||
tsk->nsproxy = new_ns;
|
tsk->nsproxy = new_ns;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void free_nsproxy(struct nsproxy *ns)
|
|
||||||
{
|
|
||||||
put_mnt_ns(ns->mnt_ns);
|
|
||||||
put_uts_ns(ns->uts_ns);
|
|
||||||
put_ipc_ns(ns->ipc_ns);
|
|
||||||
put_pid_ns(ns->pid_ns_for_children);
|
|
||||||
put_time_ns(ns->time_ns);
|
|
||||||
put_time_ns(ns->time_ns_for_children);
|
|
||||||
put_cgroup_ns(ns->cgroup_ns);
|
|
||||||
put_net(ns->net_ns);
|
|
||||||
kmem_cache_free(nsproxy_cachep, ns);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Called from unshare. Unshare all the namespaces part of nsproxy.
|
* Called from unshare. Unshare all the namespaces part of nsproxy.
|
||||||
* On success, returns the new nsproxy.
|
* On success, returns the new nsproxy.
|
||||||
|
|
@ -232,6 +240,9 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
|
||||||
|
|
||||||
might_sleep();
|
might_sleep();
|
||||||
|
|
||||||
|
if (new)
|
||||||
|
nsproxy_ns_active_get(new);
|
||||||
|
|
||||||
task_lock(p);
|
task_lock(p);
|
||||||
ns = p->nsproxy;
|
ns = p->nsproxy;
|
||||||
p->nsproxy = new;
|
p->nsproxy = new;
|
||||||
|
|
@ -241,11 +252,27 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
|
||||||
put_nsproxy(ns);
|
put_nsproxy(ns);
|
||||||
}
|
}
|
||||||
|
|
||||||
void exit_task_namespaces(struct task_struct *p)
|
void exit_nsproxy_namespaces(struct task_struct *p)
|
||||||
{
|
{
|
||||||
switch_task_namespaces(p, NULL);
|
switch_task_namespaces(p, NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void switch_cred_namespaces(const struct cred *old, const struct cred *new)
|
||||||
|
{
|
||||||
|
ns_ref_active_get(new->user_ns);
|
||||||
|
ns_ref_active_put(old->user_ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
void get_cred_namespaces(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
ns_ref_active_get(tsk->real_cred->user_ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
void exit_cred_namespaces(struct task_struct *tsk)
|
||||||
|
{
|
||||||
|
ns_ref_active_put(tsk->real_cred->user_ns);
|
||||||
|
}
|
||||||
|
|
||||||
int exec_task_namespaces(void)
|
int exec_task_namespaces(void)
|
||||||
{
|
{
|
||||||
struct task_struct *tsk = current;
|
struct task_struct *tsk = current;
|
||||||
|
|
@ -315,7 +342,7 @@ static void put_nsset(struct nsset *nsset)
|
||||||
if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
|
if (nsset->fs && (flags & CLONE_NEWNS) && (flags & ~CLONE_NEWNS))
|
||||||
free_fs_struct(nsset->fs);
|
free_fs_struct(nsset->fs);
|
||||||
if (nsset->nsproxy)
|
if (nsset->nsproxy)
|
||||||
free_nsproxy(nsset->nsproxy);
|
nsproxy_free(nsset->nsproxy);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int prepare_nsset(unsigned flags, struct nsset *nsset)
|
static int prepare_nsset(unsigned flags, struct nsset *nsset)
|
||||||
|
|
|
||||||
784
kernel/nstree.c
784
kernel/nstree.c
|
|
@ -1,140 +1,261 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0-only
|
// SPDX-License-Identifier: GPL-2.0-only
|
||||||
|
/* Copyright (c) 2025 Christian Brauner <brauner@kernel.org> */
|
||||||
|
|
||||||
#include <linux/nstree.h>
|
#include <linux/nstree.h>
|
||||||
#include <linux/proc_ns.h>
|
#include <linux/proc_ns.h>
|
||||||
|
#include <linux/rculist.h>
|
||||||
#include <linux/vfsdebug.h>
|
#include <linux/vfsdebug.h>
|
||||||
|
#include <linux/syscalls.h>
|
||||||
|
#include <linux/user_namespace.h>
|
||||||
|
|
||||||
/**
|
static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock);
|
||||||
* struct ns_tree - Namespace tree
|
|
||||||
* @ns_tree: Rbtree of namespaces of a particular type
|
DEFINE_LOCK_GUARD_0(ns_tree_writer,
|
||||||
* @ns_list: Sequentially walkable list of all namespaces of this type
|
write_seqlock(&ns_tree_lock),
|
||||||
* @ns_tree_lock: Seqlock to protect the tree and list
|
write_sequnlock(&ns_tree_lock))
|
||||||
* @type: type of namespaces in this tree
|
|
||||||
*/
|
DEFINE_LOCK_GUARD_0(ns_tree_locked_reader,
|
||||||
struct ns_tree {
|
read_seqlock_excl(&ns_tree_lock),
|
||||||
struct rb_root ns_tree;
|
read_sequnlock_excl(&ns_tree_lock))
|
||||||
struct list_head ns_list;
|
|
||||||
seqlock_t ns_tree_lock;
|
static struct ns_tree_root ns_unified_root = { /* protected by ns_tree_lock */
|
||||||
int type;
|
.ns_rb = RB_ROOT,
|
||||||
|
.ns_list_head = LIST_HEAD_INIT(ns_unified_root.ns_list_head),
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree mnt_ns_tree = {
|
struct ns_tree_root mnt_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(mnt_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(mnt_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWNS,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree net_ns_tree = {
|
struct ns_tree_root net_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(net_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(net_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWNET,
|
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(net_ns_tree);
|
EXPORT_SYMBOL_GPL(net_ns_tree);
|
||||||
|
|
||||||
struct ns_tree uts_ns_tree = {
|
struct ns_tree_root uts_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(uts_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(uts_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWUTS,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree user_ns_tree = {
|
struct ns_tree_root user_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(user_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(user_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWUSER,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree ipc_ns_tree = {
|
struct ns_tree_root ipc_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(ipc_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(ipc_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWIPC,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree pid_ns_tree = {
|
struct ns_tree_root pid_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(pid_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(pid_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWPID,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree cgroup_ns_tree = {
|
struct ns_tree_root cgroup_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(cgroup_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(cgroup_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWCGROUP,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ns_tree time_ns_tree = {
|
struct ns_tree_root time_ns_tree = {
|
||||||
.ns_tree = RB_ROOT,
|
.ns_rb = RB_ROOT,
|
||||||
.ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list),
|
.ns_list_head = LIST_HEAD_INIT(time_ns_tree.ns_list_head),
|
||||||
.ns_tree_lock = __SEQLOCK_UNLOCKED(time_ns_tree.ns_tree_lock),
|
|
||||||
.type = CLONE_NEWTIME,
|
|
||||||
};
|
};
|
||||||
|
|
||||||
DEFINE_COOKIE(namespace_cookie);
|
/**
|
||||||
|
* ns_tree_node_init - Initialize a namespace tree node
|
||||||
|
* @node: The node to initialize
|
||||||
|
*
|
||||||
|
* Initializes both the rbtree node and list entry.
|
||||||
|
*/
|
||||||
|
void ns_tree_node_init(struct ns_tree_node *node)
|
||||||
|
{
|
||||||
|
RB_CLEAR_NODE(&node->ns_node);
|
||||||
|
INIT_LIST_HEAD(&node->ns_list_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ns_tree_root_init - Initialize a namespace tree root
|
||||||
|
* @root: The root to initialize
|
||||||
|
*
|
||||||
|
* Initializes both the rbtree root and list head.
|
||||||
|
*/
|
||||||
|
void ns_tree_root_init(struct ns_tree_root *root)
|
||||||
|
{
|
||||||
|
root->ns_rb = RB_ROOT;
|
||||||
|
INIT_LIST_HEAD(&root->ns_list_head);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ns_tree_node_empty - Check if a namespace tree node is empty
|
||||||
|
* @node: The node to check
|
||||||
|
*
|
||||||
|
* Returns true if the node is not in any tree.
|
||||||
|
*/
|
||||||
|
bool ns_tree_node_empty(const struct ns_tree_node *node)
|
||||||
|
{
|
||||||
|
return RB_EMPTY_NODE(&node->ns_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ns_tree_node_add - Add a node to a namespace tree
|
||||||
|
* @node: The node to add
|
||||||
|
* @root: The tree root to add to
|
||||||
|
* @cmp: Comparison function for rbtree insertion
|
||||||
|
*
|
||||||
|
* Adds the node to both the rbtree and the list, maintaining sorted order.
|
||||||
|
* The list is maintained in the same order as the rbtree to enable efficient
|
||||||
|
* iteration.
|
||||||
|
*
|
||||||
|
* Returns: NULL if insertion succeeded, existing node if duplicate found
|
||||||
|
*/
|
||||||
|
struct rb_node *ns_tree_node_add(struct ns_tree_node *node,
|
||||||
|
struct ns_tree_root *root,
|
||||||
|
int (*cmp)(struct rb_node *, const struct rb_node *))
|
||||||
|
{
|
||||||
|
struct rb_node *ret, *prev;
|
||||||
|
|
||||||
|
/* Add to rbtree */
|
||||||
|
ret = rb_find_add_rcu(&node->ns_node, &root->ns_rb, cmp);
|
||||||
|
|
||||||
|
/* Add to list in sorted order */
|
||||||
|
prev = rb_prev(&node->ns_node);
|
||||||
|
if (!prev) {
|
||||||
|
/* No previous node, add at head */
|
||||||
|
list_add_rcu(&node->ns_list_entry, &root->ns_list_head);
|
||||||
|
} else {
|
||||||
|
/* Add after previous node */
|
||||||
|
struct ns_tree_node *prev_node;
|
||||||
|
prev_node = rb_entry(prev, struct ns_tree_node, ns_node);
|
||||||
|
list_add_rcu(&node->ns_list_entry, &prev_node->ns_list_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* ns_tree_node_del - Remove a node from a namespace tree
|
||||||
|
* @node: The node to remove
|
||||||
|
* @root: The tree root to remove from
|
||||||
|
*
|
||||||
|
* Removes the node from both the rbtree and the list atomically.
|
||||||
|
*/
|
||||||
|
void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root)
|
||||||
|
{
|
||||||
|
rb_erase(&node->ns_node, &root->ns_rb);
|
||||||
|
RB_CLEAR_NODE(&node->ns_node);
|
||||||
|
list_bidir_del_rcu(&node->ns_list_entry);
|
||||||
|
}
|
||||||
|
|
||||||
static inline struct ns_common *node_to_ns(const struct rb_node *node)
|
static inline struct ns_common *node_to_ns(const struct rb_node *node)
|
||||||
{
|
{
|
||||||
if (!node)
|
if (!node)
|
||||||
return NULL;
|
return NULL;
|
||||||
return rb_entry(node, struct ns_common, ns_tree_node);
|
return rb_entry(node, struct ns_common, ns_tree_node.ns_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline int ns_cmp(struct rb_node *a, const struct rb_node *b)
|
static inline struct ns_common *node_to_ns_unified(const struct rb_node *node)
|
||||||
{
|
{
|
||||||
struct ns_common *ns_a = node_to_ns(a);
|
if (!node)
|
||||||
struct ns_common *ns_b = node_to_ns(b);
|
return NULL;
|
||||||
u64 ns_id_a = ns_a->ns_id;
|
return rb_entry(node, struct ns_common, ns_unified_node.ns_node);
|
||||||
u64 ns_id_b = ns_b->ns_id;
|
}
|
||||||
|
|
||||||
if (ns_id_a < ns_id_b)
|
static inline struct ns_common *node_to_ns_owner(const struct rb_node *node)
|
||||||
|
{
|
||||||
|
if (!node)
|
||||||
|
return NULL;
|
||||||
|
return rb_entry(node, struct ns_common, ns_owner_node.ns_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ns_id_cmp(u64 id_a, u64 id_b)
|
||||||
|
{
|
||||||
|
if (id_a < id_b)
|
||||||
return -1;
|
return -1;
|
||||||
if (ns_id_a > ns_id_b)
|
if (id_a > id_b)
|
||||||
return 1;
|
return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree)
|
static int ns_cmp(struct rb_node *a, const struct rb_node *b)
|
||||||
{
|
{
|
||||||
struct rb_node *node, *prev;
|
return ns_id_cmp(node_to_ns(a)->ns_id, node_to_ns(b)->ns_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ns_cmp_unified(struct rb_node *a, const struct rb_node *b)
|
||||||
|
{
|
||||||
|
return ns_id_cmp(node_to_ns_unified(a)->ns_id, node_to_ns_unified(b)->ns_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b)
|
||||||
|
{
|
||||||
|
return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id);
|
||||||
|
}
|
||||||
|
|
||||||
|
void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree)
|
||||||
|
{
|
||||||
|
struct rb_node *node;
|
||||||
|
const struct proc_ns_operations *ops = ns->ops;
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(!ns->ns_id);
|
VFS_WARN_ON_ONCE(!ns->ns_id);
|
||||||
|
|
||||||
write_seqlock(&ns_tree->ns_tree_lock);
|
guard(ns_tree_writer)();
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
|
/* Add to per-type tree and list */
|
||||||
|
node = ns_tree_node_add(&ns->ns_tree_node, ns_tree, ns_cmp);
|
||||||
|
|
||||||
node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp);
|
/* Add to unified tree and list */
|
||||||
/*
|
ns_tree_node_add(&ns->ns_unified_node, &ns_unified_root, ns_cmp_unified);
|
||||||
* If there's no previous entry simply add it after the
|
|
||||||
* head and if there is add it after the previous entry.
|
|
||||||
*/
|
|
||||||
prev = rb_prev(&ns->ns_tree_node);
|
|
||||||
if (!prev)
|
|
||||||
list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list);
|
|
||||||
else
|
|
||||||
list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node);
|
|
||||||
|
|
||||||
write_sequnlock(&ns_tree->ns_tree_lock);
|
/* Add to owner's tree if applicable */
|
||||||
|
if (ops) {
|
||||||
|
struct user_namespace *user_ns;
|
||||||
|
|
||||||
|
VFS_WARN_ON_ONCE(!ops->owner);
|
||||||
|
user_ns = ops->owner(ns);
|
||||||
|
if (user_ns) {
|
||||||
|
struct ns_common *owner = &user_ns->ns;
|
||||||
|
VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
|
||||||
|
|
||||||
|
/* Insert into owner's tree and list */
|
||||||
|
ns_tree_node_add(&ns->ns_owner_node, &owner->ns_owner_root, ns_cmp_owner);
|
||||||
|
} else {
|
||||||
|
/* Only the initial user namespace doesn't have an owner. */
|
||||||
|
VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(node);
|
VFS_WARN_ON_ONCE(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree)
|
void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree)
|
||||||
{
|
{
|
||||||
VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node));
|
const struct proc_ns_operations *ops = ns->ops;
|
||||||
VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node));
|
struct user_namespace *user_ns;
|
||||||
VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type);
|
|
||||||
|
|
||||||
write_seqlock(&ns_tree->ns_tree_lock);
|
VFS_WARN_ON_ONCE(ns_tree_node_empty(&ns->ns_tree_node));
|
||||||
rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree);
|
VFS_WARN_ON_ONCE(list_empty(&ns->ns_tree_node.ns_list_entry));
|
||||||
list_bidir_del_rcu(&ns->ns_list_node);
|
|
||||||
RB_CLEAR_NODE(&ns->ns_tree_node);
|
write_seqlock(&ns_tree_lock);
|
||||||
write_sequnlock(&ns_tree->ns_tree_lock);
|
|
||||||
|
/* Remove from per-type tree and list */
|
||||||
|
ns_tree_node_del(&ns->ns_tree_node, ns_tree);
|
||||||
|
|
||||||
|
/* Remove from unified tree and list */
|
||||||
|
ns_tree_node_del(&ns->ns_unified_node, &ns_unified_root);
|
||||||
|
|
||||||
|
/* Remove from owner's tree if applicable */
|
||||||
|
if (ops) {
|
||||||
|
user_ns = ops->owner(ns);
|
||||||
|
if (user_ns) {
|
||||||
|
struct ns_common *owner = &user_ns->ns;
|
||||||
|
ns_tree_node_del(&ns->ns_owner_node, &owner->ns_owner_root);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write_sequnlock(&ns_tree_lock);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(__ns_tree_remove);
|
EXPORT_SYMBOL_GPL(__ns_tree_remove);
|
||||||
|
|
||||||
|
|
@ -150,8 +271,19 @@ static int ns_find(const void *key, const struct rb_node *node)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int ns_find_unified(const void *key, const struct rb_node *node)
|
||||||
|
{
|
||||||
|
const u64 ns_id = *(u64 *)key;
|
||||||
|
const struct ns_common *ns = node_to_ns_unified(node);
|
||||||
|
|
||||||
static struct ns_tree *ns_tree_from_type(int ns_type)
|
if (ns_id < ns->ns_id)
|
||||||
|
return -1;
|
||||||
|
if (ns_id > ns->ns_id)
|
||||||
|
return 1;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ns_tree_root *ns_tree_from_type(int ns_type)
|
||||||
{
|
{
|
||||||
switch (ns_type) {
|
switch (ns_type) {
|
||||||
case CLONE_NEWCGROUP:
|
case CLONE_NEWCGROUP:
|
||||||
|
|
@ -175,73 +307,507 @@ static struct ns_tree *ns_tree_from_type(int ns_type)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
|
static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id)
|
||||||
{
|
{
|
||||||
struct ns_tree *ns_tree;
|
|
||||||
struct rb_node *node;
|
struct rb_node *node;
|
||||||
unsigned int seq;
|
unsigned int seq;
|
||||||
|
|
||||||
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
|
do {
|
||||||
|
seq = read_seqbegin(&ns_tree_lock);
|
||||||
|
node = rb_find_rcu(&ns_id, &ns_unified_root.ns_rb, ns_find_unified);
|
||||||
|
if (node)
|
||||||
|
break;
|
||||||
|
} while (read_seqretry(&ns_tree_lock, seq));
|
||||||
|
|
||||||
|
return node_to_ns_unified(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type)
|
||||||
|
{
|
||||||
|
struct ns_tree_root *ns_tree;
|
||||||
|
struct rb_node *node;
|
||||||
|
unsigned int seq;
|
||||||
|
|
||||||
ns_tree = ns_tree_from_type(ns_type);
|
ns_tree = ns_tree_from_type(ns_type);
|
||||||
if (!ns_tree)
|
if (!ns_tree)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
seq = read_seqbegin(&ns_tree->ns_tree_lock);
|
seq = read_seqbegin(&ns_tree_lock);
|
||||||
node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find);
|
node = rb_find_rcu(&ns_id, &ns_tree->ns_rb, ns_find);
|
||||||
if (node)
|
if (node)
|
||||||
break;
|
break;
|
||||||
} while (read_seqretry(&ns_tree->ns_tree_lock, seq));
|
} while (read_seqretry(&ns_tree_lock, seq));
|
||||||
|
|
||||||
if (!node)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(node_to_ns(node)->ns_type != ns_type);
|
|
||||||
|
|
||||||
return node_to_ns(node);
|
return node_to_ns(node);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type)
|
||||||
|
{
|
||||||
|
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_lookup_rcu() usage");
|
||||||
|
|
||||||
|
if (ns_type)
|
||||||
|
return __ns_tree_lookup_rcu(ns_id, ns_type);
|
||||||
|
|
||||||
|
return __ns_unified_tree_lookup_rcu(ns_id);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ns_tree_adjoined_rcu - find the next/previous namespace in the same
|
* __ns_tree_adjoined_rcu - find the next/previous namespace in the same
|
||||||
* tree
|
* tree
|
||||||
* @ns: namespace to start from
|
* @ns: namespace to start from
|
||||||
|
* @ns_tree: namespace tree to search in
|
||||||
* @previous: if true find the previous namespace, otherwise the next
|
* @previous: if true find the previous namespace, otherwise the next
|
||||||
*
|
*
|
||||||
* Find the next or previous namespace in the same tree as @ns. If
|
* Find the next or previous namespace in the same tree as @ns. If
|
||||||
* there is no next/previous namespace, -ENOENT is returned.
|
* there is no next/previous namespace, -ENOENT is returned.
|
||||||
*/
|
*/
|
||||||
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
|
struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns,
|
||||||
struct ns_tree *ns_tree, bool previous)
|
struct ns_tree_root *ns_tree, bool previous)
|
||||||
{
|
{
|
||||||
struct list_head *list;
|
struct list_head *list;
|
||||||
|
|
||||||
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
|
RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage");
|
||||||
|
|
||||||
if (previous)
|
if (previous)
|
||||||
list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node));
|
list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_tree_node.ns_list_entry));
|
||||||
else
|
else
|
||||||
list = rcu_dereference(list_next_rcu(&ns->ns_list_node));
|
list = rcu_dereference(list_next_rcu(&ns->ns_tree_node.ns_list_entry));
|
||||||
if (list_is_head(list, &ns_tree->ns_list))
|
if (list_is_head(list, &ns_tree->ns_list_head))
|
||||||
return ERR_PTR(-ENOENT);
|
return ERR_PTR(-ENOENT);
|
||||||
|
|
||||||
VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ns_type != ns_tree->type);
|
return list_entry_rcu(list, struct ns_common, ns_tree_node.ns_list_entry);
|
||||||
|
|
||||||
return list_entry_rcu(list, struct ns_common, ns_list_node);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* ns_tree_gen_id - generate a new namespace id
|
* __ns_tree_gen_id - generate a new namespace id
|
||||||
* @ns: namespace to generate id for
|
* @ns: namespace to generate id for
|
||||||
|
* @id: if non-zero, this is the initial namespace and this is a fixed id
|
||||||
*
|
*
|
||||||
* Generates a new namespace id and assigns it to the namespace. All
|
* Generates a new namespace id and assigns it to the namespace. All
|
||||||
* namespaces types share the same id space and thus can be compared
|
* namespaces types share the same id space and thus can be compared
|
||||||
* directly. IOW, when two ids of two namespace are equal, they are
|
* directly. IOW, when two ids of two namespace are equal, they are
|
||||||
* identical.
|
* identical.
|
||||||
*/
|
*/
|
||||||
u64 ns_tree_gen_id(struct ns_common *ns)
|
u64 __ns_tree_gen_id(struct ns_common *ns, u64 id)
|
||||||
{
|
{
|
||||||
guard(preempt)();
|
static atomic64_t namespace_cookie = ATOMIC64_INIT(NS_LAST_INIT_ID + 1);
|
||||||
ns->ns_id = gen_cookie_next(&namespace_cookie);
|
|
||||||
|
if (id)
|
||||||
|
ns->ns_id = id;
|
||||||
|
else
|
||||||
|
ns->ns_id = atomic64_inc_return(&namespace_cookie);
|
||||||
return ns->ns_id;
|
return ns->ns_id;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
struct klistns {
|
||||||
|
u64 __user *uns_ids;
|
||||||
|
u32 nr_ns_ids;
|
||||||
|
u64 last_ns_id;
|
||||||
|
u64 user_ns_id;
|
||||||
|
u32 ns_type;
|
||||||
|
struct user_namespace *user_ns;
|
||||||
|
bool userns_capable;
|
||||||
|
struct ns_common *first_ns;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void __free_klistns_free(const struct klistns *kls)
|
||||||
|
{
|
||||||
|
if (kls->user_ns_id != LISTNS_CURRENT_USER)
|
||||||
|
put_user_ns(kls->user_ns);
|
||||||
|
if (kls->first_ns && kls->first_ns->ops)
|
||||||
|
kls->first_ns->ops->put(kls->first_ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NS_ALL (PID_NS | USER_NS | MNT_NS | UTS_NS | IPC_NS | NET_NS | CGROUP_NS | TIME_NS)
|
||||||
|
|
||||||
|
static int copy_ns_id_req(const struct ns_id_req __user *req,
|
||||||
|
struct ns_id_req *kreq)
|
||||||
|
{
|
||||||
|
int ret;
|
||||||
|
size_t usize;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(struct ns_id_req) != NS_ID_REQ_SIZE_VER0);
|
||||||
|
|
||||||
|
ret = get_user(usize, &req->size);
|
||||||
|
if (ret)
|
||||||
|
return -EFAULT;
|
||||||
|
if (unlikely(usize > PAGE_SIZE))
|
||||||
|
return -E2BIG;
|
||||||
|
if (unlikely(usize < NS_ID_REQ_SIZE_VER0))
|
||||||
|
return -EINVAL;
|
||||||
|
memset(kreq, 0, sizeof(*kreq));
|
||||||
|
ret = copy_struct_from_user(kreq, sizeof(*kreq), req, usize);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
if (kreq->spare != 0)
|
||||||
|
return -EINVAL;
|
||||||
|
if (kreq->ns_type & ~NS_ALL)
|
||||||
|
return -EOPNOTSUPP;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int prepare_klistns(struct klistns *kls, struct ns_id_req *kreq,
|
||||||
|
u64 __user *ns_ids, size_t nr_ns_ids)
|
||||||
|
{
|
||||||
|
kls->last_ns_id = kreq->ns_id;
|
||||||
|
kls->user_ns_id = kreq->user_ns_id;
|
||||||
|
kls->nr_ns_ids = nr_ns_ids;
|
||||||
|
kls->ns_type = kreq->ns_type;
|
||||||
|
kls->uns_ids = ns_ids;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lookup a namespace owned by owner with id >= ns_id.
|
||||||
|
* Returns the namespace with the smallest id that is >= ns_id.
|
||||||
|
*/
|
||||||
|
static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner)
|
||||||
|
{
|
||||||
|
struct ns_common *ret = NULL;
|
||||||
|
struct rb_node *node;
|
||||||
|
|
||||||
|
VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER);
|
||||||
|
|
||||||
|
guard(ns_tree_locked_reader)();
|
||||||
|
|
||||||
|
node = owner->ns_owner_root.ns_rb.rb_node;
|
||||||
|
while (node) {
|
||||||
|
struct ns_common *ns;
|
||||||
|
|
||||||
|
ns = node_to_ns_owner(node);
|
||||||
|
if (ns_id <= ns->ns_id) {
|
||||||
|
ret = ns;
|
||||||
|
if (ns_id == ns->ns_id)
|
||||||
|
break;
|
||||||
|
node = node->rb_left;
|
||||||
|
} else {
|
||||||
|
node = node->rb_right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
ret = ns_get_unless_inactive(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct ns_common *lookup_ns_id(u64 mnt_ns_id, int ns_type)
|
||||||
|
{
|
||||||
|
struct ns_common *ns;
|
||||||
|
|
||||||
|
guard(rcu)();
|
||||||
|
ns = ns_tree_lookup_rcu(mnt_ns_id, ns_type);
|
||||||
|
if (!ns)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!ns_get_unless_inactive(ns))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool __must_check ns_requested(const struct klistns *kls,
|
||||||
|
const struct ns_common *ns)
|
||||||
|
{
|
||||||
|
return !kls->ns_type || (kls->ns_type & ns->ns_type);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool __must_check may_list_ns(const struct klistns *kls,
|
||||||
|
struct ns_common *ns)
|
||||||
|
{
|
||||||
|
if (kls->user_ns) {
|
||||||
|
if (kls->userns_capable)
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
struct ns_common *owner;
|
||||||
|
struct user_namespace *user_ns;
|
||||||
|
|
||||||
|
owner = ns_owner(ns);
|
||||||
|
if (owner)
|
||||||
|
user_ns = to_user_ns(owner);
|
||||||
|
else
|
||||||
|
user_ns = &init_user_ns;
|
||||||
|
if (ns_capable_noaudit(user_ns, CAP_SYS_ADMIN))
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (is_current_namespace(ns))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
if (ns->ns_type != CLONE_NEWUSER)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
if (ns_capable_noaudit(to_user_ns(ns), CAP_SYS_ADMIN))
|
||||||
|
return true;
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void ns_put(struct ns_common *ns)
|
||||||
|
{
|
||||||
|
if (ns && ns->ops)
|
||||||
|
ns->ops->put(ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
DEFINE_FREE(ns_put, struct ns_common *, if (!IS_ERR_OR_NULL(_T)) ns_put(_T))
|
||||||
|
|
||||||
|
static inline struct ns_common *__must_check legitimize_ns(const struct klistns *kls,
|
||||||
|
struct ns_common *candidate)
|
||||||
|
{
|
||||||
|
struct ns_common *ns __free(ns_put) = NULL;
|
||||||
|
|
||||||
|
if (!ns_requested(kls, candidate))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
ns = ns_get_unless_inactive(candidate);
|
||||||
|
if (!ns)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
if (!may_list_ns(kls, ns))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
return no_free_ptr(ns);
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t do_listns_userns(struct klistns *kls)
|
||||||
|
{
|
||||||
|
u64 __user *ns_ids = kls->uns_ids;
|
||||||
|
size_t nr_ns_ids = kls->nr_ns_ids;
|
||||||
|
struct ns_common *ns = NULL, *first_ns = NULL, *prev = NULL;
|
||||||
|
const struct list_head *head;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
VFS_WARN_ON_ONCE(!kls->user_ns_id);
|
||||||
|
|
||||||
|
if (kls->user_ns_id == LISTNS_CURRENT_USER)
|
||||||
|
ns = to_ns_common(current_user_ns());
|
||||||
|
else if (kls->user_ns_id)
|
||||||
|
ns = lookup_ns_id(kls->user_ns_id, CLONE_NEWUSER);
|
||||||
|
if (!ns)
|
||||||
|
return -EINVAL;
|
||||||
|
kls->user_ns = to_user_ns(ns);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Use the rbtree to find the first namespace we care about and
|
||||||
|
* then use it's list entry to iterate from there.
|
||||||
|
*/
|
||||||
|
if (kls->last_ns_id) {
|
||||||
|
kls->first_ns = lookup_ns_owner_at(kls->last_ns_id + 1, ns);
|
||||||
|
if (!kls->first_ns)
|
||||||
|
return -ENOENT;
|
||||||
|
first_ns = kls->first_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head;
|
||||||
|
kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN);
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
if (!first_ns)
|
||||||
|
first_ns = list_entry_rcu(head->next, typeof(*first_ns), ns_owner_node.ns_list_entry);
|
||||||
|
|
||||||
|
ns = first_ns;
|
||||||
|
list_for_each_entry_from_rcu(ns, head, ns_owner_node.ns_list_entry) {
|
||||||
|
struct ns_common *valid;
|
||||||
|
|
||||||
|
if (!nr_ns_ids)
|
||||||
|
break;
|
||||||
|
|
||||||
|
valid = legitimize_ns(kls, ns);
|
||||||
|
if (!valid)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
ns_put(prev);
|
||||||
|
prev = valid;
|
||||||
|
|
||||||
|
if (put_user(valid->ns_id, ns_ids + ret)) {
|
||||||
|
ns_put(prev);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
nr_ns_ids--;
|
||||||
|
ret++;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
ns_put(prev);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Lookup a namespace with id >= ns_id in either the unified tree or a type-specific tree.
|
||||||
|
* Returns the namespace with the smallest id that is >= ns_id.
|
||||||
|
*/
|
||||||
|
static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type)
|
||||||
|
{
|
||||||
|
struct ns_common *ret = NULL;
|
||||||
|
struct ns_tree_root *ns_tree = NULL;
|
||||||
|
struct rb_node *node;
|
||||||
|
|
||||||
|
if (ns_type) {
|
||||||
|
ns_tree = ns_tree_from_type(ns_type);
|
||||||
|
if (!ns_tree)
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
guard(ns_tree_locked_reader)();
|
||||||
|
|
||||||
|
if (ns_tree)
|
||||||
|
node = ns_tree->ns_rb.rb_node;
|
||||||
|
else
|
||||||
|
node = ns_unified_root.ns_rb.rb_node;
|
||||||
|
|
||||||
|
while (node) {
|
||||||
|
struct ns_common *ns;
|
||||||
|
|
||||||
|
if (ns_type)
|
||||||
|
ns = node_to_ns(node);
|
||||||
|
else
|
||||||
|
ns = node_to_ns_unified(node);
|
||||||
|
|
||||||
|
if (ns_id <= ns->ns_id) {
|
||||||
|
if (ns_type)
|
||||||
|
ret = node_to_ns(node);
|
||||||
|
else
|
||||||
|
ret = node_to_ns_unified(node);
|
||||||
|
if (ns_id == ns->ns_id)
|
||||||
|
break;
|
||||||
|
node = node->rb_left;
|
||||||
|
} else {
|
||||||
|
node = node->rb_right;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret)
|
||||||
|
ret = ns_get_unless_inactive(ret);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct ns_common *first_ns_common(const struct list_head *head,
|
||||||
|
struct ns_tree_root *ns_tree)
|
||||||
|
{
|
||||||
|
if (ns_tree)
|
||||||
|
return list_entry_rcu(head->next, struct ns_common, ns_tree_node.ns_list_entry);
|
||||||
|
return list_entry_rcu(head->next, struct ns_common, ns_unified_node.ns_list_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct ns_common *next_ns_common(struct ns_common *ns,
|
||||||
|
struct ns_tree_root *ns_tree)
|
||||||
|
{
|
||||||
|
if (ns_tree)
|
||||||
|
return list_entry_rcu(ns->ns_tree_node.ns_list_entry.next, struct ns_common, ns_tree_node.ns_list_entry);
|
||||||
|
return list_entry_rcu(ns->ns_unified_node.ns_list_entry.next, struct ns_common, ns_unified_node.ns_list_entry);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline bool ns_common_is_head(struct ns_common *ns,
|
||||||
|
const struct list_head *head,
|
||||||
|
struct ns_tree_root *ns_tree)
|
||||||
|
{
|
||||||
|
if (ns_tree)
|
||||||
|
return &ns->ns_tree_node.ns_list_entry == head;
|
||||||
|
return &ns->ns_unified_node.ns_list_entry == head;
|
||||||
|
}
|
||||||
|
|
||||||
|
static ssize_t do_listns(struct klistns *kls)
|
||||||
|
{
|
||||||
|
u64 __user *ns_ids = kls->uns_ids;
|
||||||
|
size_t nr_ns_ids = kls->nr_ns_ids;
|
||||||
|
struct ns_common *ns, *first_ns = NULL, *prev = NULL;
|
||||||
|
struct ns_tree_root *ns_tree = NULL;
|
||||||
|
const struct list_head *head;
|
||||||
|
u32 ns_type;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
if (hweight32(kls->ns_type) == 1)
|
||||||
|
ns_type = kls->ns_type;
|
||||||
|
else
|
||||||
|
ns_type = 0;
|
||||||
|
|
||||||
|
if (ns_type) {
|
||||||
|
ns_tree = ns_tree_from_type(ns_type);
|
||||||
|
if (!ns_tree)
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (kls->last_ns_id) {
|
||||||
|
kls->first_ns = lookup_ns_id_at(kls->last_ns_id + 1, ns_type);
|
||||||
|
if (!kls->first_ns)
|
||||||
|
return -ENOENT;
|
||||||
|
first_ns = kls->first_ns;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = 0;
|
||||||
|
if (ns_tree)
|
||||||
|
head = &ns_tree->ns_list_head;
|
||||||
|
else
|
||||||
|
head = &ns_unified_root.ns_list_head;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
|
||||||
|
if (!first_ns)
|
||||||
|
first_ns = first_ns_common(head, ns_tree);
|
||||||
|
|
||||||
|
for (ns = first_ns; !ns_common_is_head(ns, head, ns_tree) && nr_ns_ids;
|
||||||
|
ns = next_ns_common(ns, ns_tree)) {
|
||||||
|
struct ns_common *valid;
|
||||||
|
|
||||||
|
valid = legitimize_ns(kls, ns);
|
||||||
|
if (!valid)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
ns_put(prev);
|
||||||
|
prev = valid;
|
||||||
|
|
||||||
|
if (put_user(valid->ns_id, ns_ids + ret)) {
|
||||||
|
ns_put(prev);
|
||||||
|
return -EFAULT;
|
||||||
|
}
|
||||||
|
|
||||||
|
nr_ns_ids--;
|
||||||
|
ret++;
|
||||||
|
|
||||||
|
rcu_read_lock();
|
||||||
|
}
|
||||||
|
|
||||||
|
rcu_read_unlock();
|
||||||
|
ns_put(prev);
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
SYSCALL_DEFINE4(listns, const struct ns_id_req __user *, req,
|
||||||
|
u64 __user *, ns_ids, size_t, nr_ns_ids, unsigned int, flags)
|
||||||
|
{
|
||||||
|
struct klistns klns __free(klistns_free) = {};
|
||||||
|
const size_t maxcount = 1000000;
|
||||||
|
struct ns_id_req kreq;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
if (flags)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
if (unlikely(nr_ns_ids > maxcount))
|
||||||
|
return -EOVERFLOW;
|
||||||
|
|
||||||
|
if (!access_ok(ns_ids, nr_ns_ids * sizeof(*ns_ids)))
|
||||||
|
return -EFAULT;
|
||||||
|
|
||||||
|
ret = copy_ns_id_req(req, &kreq);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
ret = prepare_klistns(&klns, &kreq, ns_ids, nr_ns_ids);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
|
if (kreq.user_ns_id)
|
||||||
|
return do_listns_userns(&klns);
|
||||||
|
|
||||||
|
return do_listns(&klns);
|
||||||
|
}
|
||||||
|
|
|
||||||
12
kernel/pid.c
12
kernel/pid.c
|
|
@ -71,21 +71,16 @@ static int pid_max_max = PID_MAX_LIMIT;
|
||||||
* the scheme scales to up to 4 million PIDs, runtime.
|
* the scheme scales to up to 4 million PIDs, runtime.
|
||||||
*/
|
*/
|
||||||
struct pid_namespace init_pid_ns = {
|
struct pid_namespace init_pid_ns = {
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(2),
|
.ns = NS_COMMON_INIT(init_pid_ns),
|
||||||
.idr = IDR_INIT(init_pid_ns.idr),
|
.idr = IDR_INIT(init_pid_ns.idr),
|
||||||
.pid_allocated = PIDNS_ADDING,
|
.pid_allocated = PIDNS_ADDING,
|
||||||
.level = 0,
|
.level = 0,
|
||||||
.child_reaper = &init_task,
|
.child_reaper = &init_task,
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.inum = ns_init_inum(&init_pid_ns),
|
|
||||||
#ifdef CONFIG_PID_NS
|
|
||||||
.ns.ops = &pidns_operations,
|
|
||||||
#endif
|
|
||||||
.pid_max = PID_MAX_DEFAULT,
|
.pid_max = PID_MAX_DEFAULT,
|
||||||
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
|
#if defined(CONFIG_SYSCTL) && defined(CONFIG_MEMFD_CREATE)
|
||||||
.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
|
.memfd_noexec_scope = MEMFD_NOEXEC_SCOPE_EXEC,
|
||||||
#endif
|
#endif
|
||||||
.ns.ns_type = ns_common_type(&init_pid_ns),
|
|
||||||
};
|
};
|
||||||
EXPORT_SYMBOL_GPL(init_pid_ns);
|
EXPORT_SYMBOL_GPL(init_pid_ns);
|
||||||
|
|
||||||
|
|
@ -117,9 +112,13 @@ static void delayed_put_pid(struct rcu_head *rhp)
|
||||||
void free_pid(struct pid *pid)
|
void free_pid(struct pid *pid)
|
||||||
{
|
{
|
||||||
int i;
|
int i;
|
||||||
|
struct pid_namespace *active_ns;
|
||||||
|
|
||||||
lockdep_assert_not_held(&tasklist_lock);
|
lockdep_assert_not_held(&tasklist_lock);
|
||||||
|
|
||||||
|
active_ns = pid->numbers[pid->level].ns;
|
||||||
|
ns_ref_active_put(active_ns);
|
||||||
|
|
||||||
spin_lock(&pidmap_lock);
|
spin_lock(&pidmap_lock);
|
||||||
for (i = 0; i <= pid->level; i++) {
|
for (i = 0; i <= pid->level; i++) {
|
||||||
struct upid *upid = pid->numbers + i;
|
struct upid *upid = pid->numbers + i;
|
||||||
|
|
@ -283,6 +282,7 @@ struct pid *alloc_pid(struct pid_namespace *ns, pid_t *set_tid,
|
||||||
}
|
}
|
||||||
spin_unlock(&pidmap_lock);
|
spin_unlock(&pidmap_lock);
|
||||||
idr_preload_end();
|
idr_preload_end();
|
||||||
|
ns_ref_active_get(ns);
|
||||||
|
|
||||||
return pid;
|
return pid;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -184,7 +184,7 @@ struct pid_namespace *copy_pid_ns(u64 flags,
|
||||||
|
|
||||||
void put_pid_ns(struct pid_namespace *ns)
|
void put_pid_ns(struct pid_namespace *ns)
|
||||||
{
|
{
|
||||||
if (ns && ns != &init_pid_ns && ns_ref_put(ns))
|
if (ns && ns_ref_put(ns))
|
||||||
schedule_work(&ns->work);
|
schedule_work(&ns->work);
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(put_pid_ns);
|
EXPORT_SYMBOL_GPL(put_pid_ns);
|
||||||
|
|
|
||||||
|
|
@ -478,11 +478,8 @@ const struct proc_ns_operations timens_for_children_operations = {
|
||||||
};
|
};
|
||||||
|
|
||||||
struct time_namespace init_time_ns = {
|
struct time_namespace init_time_ns = {
|
||||||
.ns.ns_type = ns_common_type(&init_time_ns),
|
.ns = NS_COMMON_INIT(init_time_ns),
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(3),
|
|
||||||
.user_ns = &init_user_ns,
|
.user_ns = &init_user_ns,
|
||||||
.ns.inum = ns_init_inum(&init_time_ns),
|
|
||||||
.ns.ops = &timens_operations,
|
|
||||||
.frozen_offsets = true,
|
.frozen_offsets = true,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ EXPORT_SYMBOL_GPL(init_binfmt_misc);
|
||||||
* and 1 for... ?
|
* and 1 for... ?
|
||||||
*/
|
*/
|
||||||
struct user_namespace init_user_ns = {
|
struct user_namespace init_user_ns = {
|
||||||
|
.ns = NS_COMMON_INIT(init_user_ns),
|
||||||
.uid_map = {
|
.uid_map = {
|
||||||
{
|
{
|
||||||
.extent[0] = {
|
.extent[0] = {
|
||||||
|
|
@ -65,14 +66,8 @@ struct user_namespace init_user_ns = {
|
||||||
.nr_extents = 1,
|
.nr_extents = 1,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
.ns.ns_type = ns_common_type(&init_user_ns),
|
|
||||||
.ns.__ns_ref = REFCOUNT_INIT(3),
|
|
||||||
.owner = GLOBAL_ROOT_UID,
|
.owner = GLOBAL_ROOT_UID,
|
||||||
.group = GLOBAL_ROOT_GID,
|
.group = GLOBAL_ROOT_GID,
|
||||||
.ns.inum = ns_init_inum(&init_user_ns),
|
|
||||||
#ifdef CONFIG_USER_NS
|
|
||||||
.ns.ops = &userns_operations,
|
|
||||||
#endif
|
|
||||||
.flags = USERNS_INIT_FLAGS,
|
.flags = USERNS_INIT_FLAGS,
|
||||||
#ifdef CONFIG_KEYS
|
#ifdef CONFIG_KEYS
|
||||||
.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
|
.keyring_name_list = LIST_HEAD_INIT(init_user_ns.keyring_name_list),
|
||||||
|
|
|
||||||
|
|
@ -439,7 +439,7 @@ static __net_init int setup_net(struct net *net)
|
||||||
LIST_HEAD(net_exit_list);
|
LIST_HEAD(net_exit_list);
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
net->net_cookie = ns_tree_gen_id(&net->ns);
|
net->net_cookie = ns_tree_gen_id(net);
|
||||||
|
|
||||||
list_for_each_entry(ops, &pernet_list, list) {
|
list_for_each_entry(ops, &pernet_list, list) {
|
||||||
error = ops_init(ops, net);
|
error = ops_init(ops, net);
|
||||||
|
|
|
||||||
|
|
@ -410,3 +410,4 @@
|
||||||
467 common open_tree_attr sys_open_tree_attr
|
467 common open_tree_attr sys_open_tree_attr
|
||||||
468 common file_getattr sys_file_getattr
|
468 common file_getattr sys_file_getattr
|
||||||
469 common file_setattr sys_file_setattr
|
469 common file_setattr sys_file_setattr
|
||||||
|
470 common listns sys_listns
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,76 @@ enum init_ns_ino {
|
||||||
TIME_NS_INIT_INO = 0xEFFFFFFAU,
|
TIME_NS_INIT_INO = 0xEFFFFFFAU,
|
||||||
NET_NS_INIT_INO = 0xEFFFFFF9U,
|
NET_NS_INIT_INO = 0xEFFFFFF9U,
|
||||||
MNT_NS_INIT_INO = 0xEFFFFFF8U,
|
MNT_NS_INIT_INO = 0xEFFFFFF8U,
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
MNT_NS_ANON_INO = 0xEFFFFFF7U,
|
||||||
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct nsfs_file_handle {
|
||||||
|
__u64 ns_id;
|
||||||
|
__u32 ns_type;
|
||||||
|
__u32 ns_inum;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */
|
||||||
|
#define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */
|
||||||
|
|
||||||
|
enum init_ns_id {
|
||||||
|
IPC_NS_INIT_ID = 1ULL,
|
||||||
|
UTS_NS_INIT_ID = 2ULL,
|
||||||
|
USER_NS_INIT_ID = 3ULL,
|
||||||
|
PID_NS_INIT_ID = 4ULL,
|
||||||
|
CGROUP_NS_INIT_ID = 5ULL,
|
||||||
|
TIME_NS_INIT_ID = 6ULL,
|
||||||
|
NET_NS_INIT_ID = 7ULL,
|
||||||
|
MNT_NS_INIT_ID = 8ULL,
|
||||||
|
#ifdef __KERNEL__
|
||||||
|
NS_LAST_INIT_ID = MNT_NS_INIT_ID,
|
||||||
|
#endif
|
||||||
|
};
|
||||||
|
|
||||||
|
enum ns_type {
|
||||||
|
TIME_NS = (1ULL << 7), /* CLONE_NEWTIME */
|
||||||
|
MNT_NS = (1ULL << 17), /* CLONE_NEWNS */
|
||||||
|
CGROUP_NS = (1ULL << 25), /* CLONE_NEWCGROUP */
|
||||||
|
UTS_NS = (1ULL << 26), /* CLONE_NEWUTS */
|
||||||
|
IPC_NS = (1ULL << 27), /* CLONE_NEWIPC */
|
||||||
|
USER_NS = (1ULL << 28), /* CLONE_NEWUSER */
|
||||||
|
PID_NS = (1ULL << 29), /* CLONE_NEWPID */
|
||||||
|
NET_NS = (1ULL << 30), /* CLONE_NEWNET */
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct ns_id_req - namespace ID request structure
|
||||||
|
* @size: size of this structure
|
||||||
|
* @spare: reserved for future use
|
||||||
|
* @filter: filter mask
|
||||||
|
* @ns_id: last namespace id
|
||||||
|
* @user_ns_id: owning user namespace ID
|
||||||
|
*
|
||||||
|
* Structure for passing namespace ID and miscellaneous parameters to
|
||||||
|
* statns(2) and listns(2).
|
||||||
|
*
|
||||||
|
* For statns(2) @param represents the request mask.
|
||||||
|
* For listns(2) @param represents the last listed mount id (or zero).
|
||||||
|
*/
|
||||||
|
struct ns_id_req {
|
||||||
|
__u32 size;
|
||||||
|
__u32 spare;
|
||||||
|
__u64 ns_id;
|
||||||
|
struct /* listns */ {
|
||||||
|
__u32 ns_type;
|
||||||
|
__u32 spare2;
|
||||||
|
__u64 user_ns_id;
|
||||||
|
};
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Special @user_ns_id value that can be passed to listns()
|
||||||
|
*/
|
||||||
|
#define LISTNS_CURRENT_USER 0xffffffffffffffff /* Caller's userns */
|
||||||
|
|
||||||
|
/* List of all ns_id_req versions. */
|
||||||
|
#define NS_ID_REQ_SIZE_VER0 32 /* sizeof first published struct */
|
||||||
|
|
||||||
#endif /* __LINUX_NSFS_H */
|
#endif /* __LINUX_NSFS_H */
|
||||||
|
|
|
||||||
|
|
@ -487,7 +487,7 @@ int setup_userns(void)
|
||||||
uid_t uid = getuid();
|
uid_t uid = getuid();
|
||||||
gid_t gid = getgid();
|
gid_t gid = getgid();
|
||||||
|
|
||||||
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
|
ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
|
||||||
if (ret) {
|
if (ret) {
|
||||||
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
|
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
|
||||||
strerror(errno));
|
strerror(errno));
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,12 @@
|
||||||
nsid_test
|
nsid_test
|
||||||
file_handle_test
|
file_handle_test
|
||||||
init_ino_test
|
init_ino_test
|
||||||
|
ns_active_ref_test
|
||||||
|
listns_test
|
||||||
|
listns_permissions_test
|
||||||
|
listns_efault_test
|
||||||
|
siocgskns_test
|
||||||
|
cred_change_test
|
||||||
|
stress_test
|
||||||
|
listns_pagination_bug
|
||||||
|
regression_pidfd_setns_test
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,29 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
|
CFLAGS += -Wall -O0 -g $(KHDR_INCLUDES) $(TOOLS_INCLUDES)
|
||||||
|
LDLIBS += -lcap
|
||||||
|
|
||||||
TEST_GEN_PROGS := nsid_test file_handle_test init_ino_test
|
TEST_GEN_PROGS := nsid_test \
|
||||||
|
file_handle_test \
|
||||||
|
init_ino_test \
|
||||||
|
ns_active_ref_test \
|
||||||
|
listns_test \
|
||||||
|
listns_permissions_test \
|
||||||
|
listns_efault_test \
|
||||||
|
siocgskns_test \
|
||||||
|
cred_change_test \
|
||||||
|
stress_test \
|
||||||
|
listns_pagination_bug \
|
||||||
|
regression_pidfd_setns_test
|
||||||
|
|
||||||
include ../lib.mk
|
include ../lib.mk
|
||||||
|
|
||||||
|
$(OUTPUT)/ns_active_ref_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/listns_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/listns_permissions_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/listns_efault_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/siocgskns_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/cred_change_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/stress_test: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/listns_pagination_bug: ../filesystems/utils.c
|
||||||
|
$(OUTPUT)/regression_pidfd_setns_test: ../filesystems/utils.c
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,814 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/capability.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test credential changes and their impact on namespace active references.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test setuid() in a user namespace properly swaps active references.
|
||||||
|
* Create a user namespace with multiple UIDs mapped, then setuid() between them.
|
||||||
|
* Verify that the user namespace remains active throughout.
|
||||||
|
*/
|
||||||
|
TEST(setuid_preserves_active_refs)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int setuid_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace with multiple UIDs mapped (0-9) */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 10);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send namespace ID to parent */
|
||||||
|
write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform multiple setuid() calls.
|
||||||
|
* Each setuid() triggers commit_creds() which should properly
|
||||||
|
* swap active references via switch_cred_namespaces().
|
||||||
|
*/
|
||||||
|
for (setuid_count = 0; setuid_count < 50; setuid_count++) {
|
||||||
|
uid_t target_uid = (setuid_count % 10);
|
||||||
|
if (setuid(target_uid) < 0) {
|
||||||
|
if (errno != EPERM) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace ID from child");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
TH_LOG("Child user namespace ID: %llu", (unsigned long long)userns_id);
|
||||||
|
|
||||||
|
/* Verify namespace is active while child is running */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_TRUE(found);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify namespace becomes inactive after child exits */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
|
||||||
|
found = false;
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
TH_LOG("setuid() correctly preserved active references (no leak)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test setgid() in a user namespace properly handles active references.
|
||||||
|
*/
|
||||||
|
TEST(setgid_preserves_active_refs)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int setgid_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace with multiple GIDs mapped */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 10);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
|
||||||
|
|
||||||
|
/* Perform multiple setgid() calls */
|
||||||
|
for (setgid_count = 0; setgid_count < 50; setgid_count++) {
|
||||||
|
gid_t target_gid = (setgid_count % 10);
|
||||||
|
if (setgid(target_gid) < 0) {
|
||||||
|
if (errno != EPERM) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace ID from child");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify namespace becomes inactive */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
TH_LOG("setgid() correctly preserved active references (no leak)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test setresuid() which changes real, effective, and saved UIDs.
|
||||||
|
* This should properly swap active references via commit_creds().
|
||||||
|
*/
|
||||||
|
TEST(setresuid_preserves_active_refs)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int setres_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 10);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
|
||||||
|
|
||||||
|
/* Perform multiple setresuid() calls */
|
||||||
|
for (setres_count = 0; setres_count < 30; setres_count++) {
|
||||||
|
uid_t uid1 = (setres_count % 5);
|
||||||
|
uid_t uid2 = ((setres_count + 1) % 5);
|
||||||
|
uid_t uid3 = ((setres_count + 2) % 5);
|
||||||
|
|
||||||
|
if (setresuid(uid1, uid2, uid3) < 0) {
|
||||||
|
if (errno != EPERM) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace ID from child");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify namespace becomes inactive */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
TH_LOG("setresuid() correctly preserved active references (no leak)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test credential changes across multiple user namespaces.
|
||||||
|
* Create nested user namespaces and verify active reference tracking.
|
||||||
|
*/
|
||||||
|
TEST(cred_change_nested_userns)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 parent_userns_id, child_userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found_parent = false, found_child = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 parent_id, child_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create first user namespace */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 1);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get first namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &parent_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Create nested user namespace */
|
||||||
|
userns_fd = get_userns_fd(0, 0, 1);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get nested namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send both IDs to parent */
|
||||||
|
write(pipefd[1], &parent_id, sizeof(parent_id));
|
||||||
|
write(pipefd[1], &child_id, sizeof(child_id));
|
||||||
|
|
||||||
|
/* Perform some credential changes in nested namespace */
|
||||||
|
setuid(0);
|
||||||
|
setgid(0);
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
/* Read both namespace IDs */
|
||||||
|
if (read(pipefd[0], &parent_userns_id, sizeof(parent_userns_id)) != sizeof(parent_userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get parent namespace ID");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (read(pipefd[0], &child_userns_id, sizeof(child_userns_id)) != sizeof(child_userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get child namespace ID");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
TH_LOG("Parent userns: %llu, Child userns: %llu",
|
||||||
|
(unsigned long long)parent_userns_id,
|
||||||
|
(unsigned long long)child_userns_id);
|
||||||
|
|
||||||
|
/* Verify both namespaces are active */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == parent_userns_id)
|
||||||
|
found_parent = true;
|
||||||
|
if (ns_ids[i] == child_userns_id)
|
||||||
|
found_child = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_TRUE(found_parent);
|
||||||
|
ASSERT_TRUE(found_child);
|
||||||
|
|
||||||
|
/* Wait for child */
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify both namespaces become inactive */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
|
||||||
|
found_parent = false;
|
||||||
|
found_child = false;
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == parent_userns_id)
|
||||||
|
found_parent = true;
|
||||||
|
if (ns_ids[i] == child_userns_id)
|
||||||
|
found_child = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found_parent);
|
||||||
|
ASSERT_FALSE(found_child);
|
||||||
|
TH_LOG("Nested user namespace credential changes preserved active refs (no leak)");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test rapid credential changes don't cause refcount imbalances.
|
||||||
|
* This stress-tests the switch_cred_namespaces() logic.
|
||||||
|
*/
|
||||||
|
TEST(rapid_cred_changes_no_leak)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int change_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace with wider range of UIDs/GIDs */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 100);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform many rapid credential changes.
|
||||||
|
* Mix setuid, setgid, setreuid, setregid, setresuid, setresgid.
|
||||||
|
*/
|
||||||
|
for (change_count = 0; change_count < 200; change_count++) {
|
||||||
|
switch (change_count % 6) {
|
||||||
|
case 0:
|
||||||
|
setuid(change_count % 50);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
setgid(change_count % 50);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
setreuid(change_count % 50, (change_count + 1) % 50);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
setregid(change_count % 50, (change_count + 1) % 50);
|
||||||
|
break;
|
||||||
|
case 4:
|
||||||
|
setresuid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
|
||||||
|
break;
|
||||||
|
case 5:
|
||||||
|
setresgid(change_count % 50, (change_count + 1) % 50, (change_count + 2) % 50);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace ID from child");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
TH_LOG("Testing with user namespace ID: %llu", (unsigned long long)userns_id);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify namespace becomes inactive (no leaked active refs) */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
TH_LOG("200 rapid credential changes completed with no active ref leak");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test setfsuid/setfsgid which change filesystem UID/GID.
|
||||||
|
* These also trigger credential changes but may have different code paths.
|
||||||
|
*/
|
||||||
|
TEST(setfsuid_preserves_active_refs)
|
||||||
|
{
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 userns_id;
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[256];
|
||||||
|
ssize_t ret;
|
||||||
|
int i;
|
||||||
|
bool found = false;
|
||||||
|
int pipefd[2];
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child process */
|
||||||
|
int fd, userns_fd;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int change_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace */
|
||||||
|
userns_fd = get_userns_fd(0, orig_uid, 10);
|
||||||
|
if (userns_fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
write(pipefd[1], &child_userns_id, sizeof(child_userns_id));
|
||||||
|
|
||||||
|
/* Perform multiple setfsuid/setfsgid calls */
|
||||||
|
for (change_count = 0; change_count < 50; change_count++) {
|
||||||
|
setfsuid(change_count % 10);
|
||||||
|
setfsgid(change_count % 10);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent process */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
if (read(pipefd[0], &userns_id, sizeof(userns_id)) != sizeof(userns_id)) {
|
||||||
|
close(pipefd[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace ID from child");
|
||||||
|
}
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Verify namespace becomes inactive */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == userns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
TH_LOG("setfsuid/setfsgid correctly preserved active references (no leak)");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
|
|
@ -0,0 +1,530 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/mount.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "../pidfd/pidfd.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() error handling with invalid buffer addresses.
|
||||||
|
*
|
||||||
|
* When the buffer pointer is invalid (e.g., crossing page boundaries
|
||||||
|
* into unmapped memory), listns() returns EINVAL.
|
||||||
|
*
|
||||||
|
* This test also creates mount namespaces that get destroyed during
|
||||||
|
* iteration, testing that namespace cleanup happens outside the RCU
|
||||||
|
* read lock.
|
||||||
|
*/
|
||||||
|
TEST(listns_partial_fault_with_ns_cleanup)
|
||||||
|
{
|
||||||
|
void *map;
|
||||||
|
__u64 *ns_ids;
|
||||||
|
ssize_t ret;
|
||||||
|
long page_size;
|
||||||
|
pid_t pid, iter_pid;
|
||||||
|
int pidfds[5];
|
||||||
|
int sv[5][2];
|
||||||
|
int iter_pidfd;
|
||||||
|
int i, status;
|
||||||
|
char c;
|
||||||
|
|
||||||
|
page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
ASSERT_GT(page_size, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Map two pages:
|
||||||
|
* - First page: readable and writable
|
||||||
|
* - Second page: will be unmapped to trigger EFAULT
|
||||||
|
*/
|
||||||
|
map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
ASSERT_NE(map, MAP_FAILED);
|
||||||
|
|
||||||
|
/* Unmap the second page */
|
||||||
|
ret = munmap((char *)map + page_size, page_size);
|
||||||
|
ASSERT_EQ(ret, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Position the buffer pointer so there's room for exactly one u64
|
||||||
|
* before the page boundary. The second u64 would fall into the
|
||||||
|
* unmapped page.
|
||||||
|
*/
|
||||||
|
ns_ids = ((__u64 *)((char *)map + page_size)) - 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a separate process to run listns() in a loop concurrently
|
||||||
|
* with namespace creation and destruction.
|
||||||
|
*/
|
||||||
|
iter_pid = create_child(&iter_pidfd, 0);
|
||||||
|
ASSERT_NE(iter_pid, -1);
|
||||||
|
|
||||||
|
if (iter_pid == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0, /* All types */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0, /* Global listing */
|
||||||
|
};
|
||||||
|
int iter_ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop calling listns() until killed.
|
||||||
|
* The kernel should:
|
||||||
|
* 1. Successfully write the first namespace ID (within valid page)
|
||||||
|
* 2. Fail with EFAULT when trying to write the second ID (unmapped page)
|
||||||
|
* 3. Handle concurrent namespace destruction without deadlock
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
iter_ret = sys_listns(&req, ns_ids, 2, 0);
|
||||||
|
|
||||||
|
if (iter_ret == -1 && errno == ENOSYS)
|
||||||
|
_exit(PIDFD_SKIP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Small delay to let iterator start looping */
|
||||||
|
usleep(50000);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create several child processes, each in its own mount namespace.
|
||||||
|
* These will be destroyed while the iterator is running listns().
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 5; i++) {
|
||||||
|
/* Create socketpair for synchronization */
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
|
||||||
|
|
||||||
|
pid = create_child(&pidfds[i], CLONE_NEWNS);
|
||||||
|
ASSERT_NE(pid, -1);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
close(sv[i][0]); /* Close parent end */
|
||||||
|
|
||||||
|
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Child: create a couple of tmpfs mounts */
|
||||||
|
if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Signal parent that setup is complete */
|
||||||
|
if (write_nointr(sv[i][1], "R", 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Wait for parent to signal us to exit */
|
||||||
|
if (read_nointr(sv[i][1], &c, 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
close(sv[i][1]);
|
||||||
|
_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[i][1]); /* Close child end */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all children to finish setup */
|
||||||
|
for (i = 0; i < 5; i++) {
|
||||||
|
ret = read_nointr(sv[i][0], &c, 1);
|
||||||
|
ASSERT_EQ(ret, 1);
|
||||||
|
ASSERT_EQ(c, 'R');
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signal children to exit. This will destroy their mount namespaces
|
||||||
|
* while listns() is iterating the namespace tree.
|
||||||
|
* This tests that cleanup happens outside the RCU read lock.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
write_nointr(sv[i][0], "X", 1);
|
||||||
|
|
||||||
|
/* Wait for all mount namespace children to exit and cleanup */
|
||||||
|
for (i = 0; i < 5; i++) {
|
||||||
|
waitpid(-1, NULL, 0);
|
||||||
|
close(sv[i][0]);
|
||||||
|
close(pidfds[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kill iterator and wait for it */
|
||||||
|
sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
|
||||||
|
ret = waitpid(iter_pid, &status, 0);
|
||||||
|
ASSERT_EQ(ret, iter_pid);
|
||||||
|
close(iter_pidfd);
|
||||||
|
|
||||||
|
/* Should have been killed */
|
||||||
|
ASSERT_TRUE(WIFSIGNALED(status));
|
||||||
|
ASSERT_EQ(WTERMSIG(status), SIGKILL);
|
||||||
|
|
||||||
|
/* Clean up */
|
||||||
|
munmap(map, page_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() error handling when the entire buffer is invalid.
|
||||||
|
* This is a sanity check that basic invalid pointer detection works.
|
||||||
|
*/
|
||||||
|
TEST(listns_complete_fault)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 *ns_ids;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
/* Use a clearly invalid pointer */
|
||||||
|
ns_ids = (__u64 *)0xdeadbeef;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, 10, 0);
|
||||||
|
|
||||||
|
if (ret == -1 && errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
|
||||||
|
/* Should fail with EFAULT */
|
||||||
|
ASSERT_EQ(ret, -1);
|
||||||
|
ASSERT_EQ(errno, EFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() error handling when the buffer is NULL.
|
||||||
|
*/
|
||||||
|
TEST(listns_null_buffer)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
/* NULL buffer with non-zero count should fail */
|
||||||
|
ret = sys_listns(&req, NULL, 10, 0);
|
||||||
|
|
||||||
|
if (ret == -1 && errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
|
||||||
|
/* Should fail with EFAULT */
|
||||||
|
ASSERT_EQ(ret, -1);
|
||||||
|
ASSERT_EQ(errno, EFAULT);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() with a buffer that becomes invalid mid-iteration
|
||||||
|
* (after several successful writes), combined with mount namespace
|
||||||
|
* destruction to test RCU cleanup logic.
|
||||||
|
*/
|
||||||
|
TEST(listns_late_fault_with_ns_cleanup)
|
||||||
|
{
|
||||||
|
void *map;
|
||||||
|
__u64 *ns_ids;
|
||||||
|
ssize_t ret;
|
||||||
|
long page_size;
|
||||||
|
pid_t pid, iter_pid;
|
||||||
|
int pidfds[10];
|
||||||
|
int sv[10][2];
|
||||||
|
int iter_pidfd;
|
||||||
|
int i, status;
|
||||||
|
char c;
|
||||||
|
|
||||||
|
page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
ASSERT_GT(page_size, 0);
|
||||||
|
|
||||||
|
/* Map two pages */
|
||||||
|
map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
ASSERT_NE(map, MAP_FAILED);
|
||||||
|
|
||||||
|
/* Unmap the second page */
|
||||||
|
ret = munmap((char *)map + page_size, page_size);
|
||||||
|
ASSERT_EQ(ret, 0);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Position buffer so we can write several u64s successfully
|
||||||
|
* before hitting the page boundary.
|
||||||
|
*/
|
||||||
|
ns_ids = ((__u64 *)((char *)map + page_size)) - 5;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a separate process to run listns() concurrently.
|
||||||
|
*/
|
||||||
|
iter_pid = create_child(&iter_pidfd, 0);
|
||||||
|
ASSERT_NE(iter_pid, -1);
|
||||||
|
|
||||||
|
if (iter_pid == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
int iter_ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop calling listns() until killed.
|
||||||
|
* Request 10 namespace IDs while namespaces are being destroyed.
|
||||||
|
* This tests:
|
||||||
|
* 1. EFAULT handling when buffer becomes invalid
|
||||||
|
* 2. Namespace cleanup outside RCU read lock during iteration
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
iter_ret = sys_listns(&req, ns_ids, 10, 0);
|
||||||
|
|
||||||
|
if (iter_ret == -1 && errno == ENOSYS)
|
||||||
|
_exit(PIDFD_SKIP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Small delay to let iterator start looping */
|
||||||
|
usleep(50000);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create more children with mount namespaces to increase the
|
||||||
|
* likelihood that namespace cleanup happens during iteration.
|
||||||
|
*/
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
/* Create socketpair for synchronization */
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
|
||||||
|
|
||||||
|
pid = create_child(&pidfds[i], CLONE_NEWNS);
|
||||||
|
ASSERT_NE(pid, -1);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
close(sv[i][0]); /* Close parent end */
|
||||||
|
|
||||||
|
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Child: create tmpfs mounts */
|
||||||
|
if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Signal parent that setup is complete */
|
||||||
|
if (write_nointr(sv[i][1], "R", 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Wait for parent to signal us to exit */
|
||||||
|
if (read_nointr(sv[i][1], &c, 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
close(sv[i][1]);
|
||||||
|
_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[i][1]); /* Close child end */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all children to finish setup */
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
ret = read_nointr(sv[i][0], &c, 1);
|
||||||
|
ASSERT_EQ(ret, 1);
|
||||||
|
ASSERT_EQ(c, 'R');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kill half the children */
|
||||||
|
for (i = 0; i < 5; i++)
|
||||||
|
write_nointr(sv[i][0], "X", 1);
|
||||||
|
|
||||||
|
/* Small delay to let some exit */
|
||||||
|
usleep(10000);
|
||||||
|
|
||||||
|
/* Kill remaining children */
|
||||||
|
for (i = 5; i < 10; i++)
|
||||||
|
write_nointr(sv[i][0], "X", 1);
|
||||||
|
|
||||||
|
/* Wait for all children and cleanup */
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
waitpid(-1, NULL, 0);
|
||||||
|
close(sv[i][0]);
|
||||||
|
close(pidfds[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kill iterator and wait for it */
|
||||||
|
sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
|
||||||
|
ret = waitpid(iter_pid, &status, 0);
|
||||||
|
ASSERT_EQ(ret, iter_pid);
|
||||||
|
close(iter_pidfd);
|
||||||
|
|
||||||
|
/* Should have been killed */
|
||||||
|
ASSERT_TRUE(WIFSIGNALED(status));
|
||||||
|
ASSERT_EQ(WTERMSIG(status), SIGKILL);
|
||||||
|
|
||||||
|
/* Clean up */
|
||||||
|
munmap(map, page_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test specifically focused on mount namespace cleanup during EFAULT.
|
||||||
|
* Filter for mount namespaces only.
|
||||||
|
*/
|
||||||
|
TEST(listns_mnt_ns_cleanup_on_fault)
|
||||||
|
{
|
||||||
|
void *map;
|
||||||
|
__u64 *ns_ids;
|
||||||
|
ssize_t ret;
|
||||||
|
long page_size;
|
||||||
|
pid_t pid, iter_pid;
|
||||||
|
int pidfds[8];
|
||||||
|
int sv[8][2];
|
||||||
|
int iter_pidfd;
|
||||||
|
int i, status;
|
||||||
|
char c;
|
||||||
|
|
||||||
|
page_size = sysconf(_SC_PAGESIZE);
|
||||||
|
ASSERT_GT(page_size, 0);
|
||||||
|
|
||||||
|
/* Set up partial fault buffer */
|
||||||
|
map = mmap(NULL, page_size * 2, PROT_READ | PROT_WRITE,
|
||||||
|
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
|
||||||
|
ASSERT_NE(map, MAP_FAILED);
|
||||||
|
|
||||||
|
ret = munmap((char *)map + page_size, page_size);
|
||||||
|
ASSERT_EQ(ret, 0);
|
||||||
|
|
||||||
|
/* Position for 3 successful writes, then fault */
|
||||||
|
ns_ids = ((__u64 *)((char *)map + page_size)) - 3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Create a separate process to run listns() concurrently.
|
||||||
|
*/
|
||||||
|
iter_pid = create_child(&iter_pidfd, 0);
|
||||||
|
ASSERT_NE(iter_pid, -1);
|
||||||
|
|
||||||
|
if (iter_pid == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNS, /* Only mount namespaces */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
int iter_ret;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop calling listns() until killed.
|
||||||
|
* Call listns() to race with namespace destruction.
|
||||||
|
*/
|
||||||
|
while (1) {
|
||||||
|
iter_ret = sys_listns(&req, ns_ids, 10, 0);
|
||||||
|
|
||||||
|
if (iter_ret == -1 && errno == ENOSYS)
|
||||||
|
_exit(PIDFD_SKIP);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Small delay to let iterator start looping */
|
||||||
|
usleep(50000);
|
||||||
|
|
||||||
|
/* Create children with mount namespaces */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
/* Create socketpair for synchronization */
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv[i]), 0);
|
||||||
|
|
||||||
|
pid = create_child(&pidfds[i], CLONE_NEWNS);
|
||||||
|
ASSERT_NE(pid, -1);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
close(sv[i][0]); /* Close parent end */
|
||||||
|
|
||||||
|
if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Do some mount operations to make cleanup more interesting */
|
||||||
|
if (mkdir("/tmp/test_mnt1", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
if (mkdir("/tmp/test_mnt2", 0755) == -1 && errno != EEXIST)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt1", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
if (mount("tmpfs", "/tmp/test_mnt2", "tmpfs", 0, NULL) == -1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Signal parent that setup is complete */
|
||||||
|
if (write_nointr(sv[i][1], "R", 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
/* Wait for parent to signal us to exit */
|
||||||
|
if (read_nointr(sv[i][1], &c, 1) != 1)
|
||||||
|
_exit(1);
|
||||||
|
|
||||||
|
close(sv[i][1]);
|
||||||
|
_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[i][1]); /* Close child end */
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all children to finish setup */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
ret = read_nointr(sv[i][0], &c, 1);
|
||||||
|
ASSERT_EQ(ret, 1);
|
||||||
|
ASSERT_EQ(c, 'R');
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kill children to trigger namespace destruction during iteration */
|
||||||
|
for (i = 0; i < 8; i++)
|
||||||
|
write_nointr(sv[i][0], "X", 1);
|
||||||
|
|
||||||
|
/* Wait for children and cleanup */
|
||||||
|
for (i = 0; i < 8; i++) {
|
||||||
|
waitpid(-1, NULL, 0);
|
||||||
|
close(sv[i][0]);
|
||||||
|
close(pidfds[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Kill iterator and wait for it */
|
||||||
|
sys_pidfd_send_signal(iter_pidfd, SIGKILL, NULL, 0);
|
||||||
|
ret = waitpid(iter_pid, &status, 0);
|
||||||
|
ASSERT_EQ(ret, iter_pid);
|
||||||
|
close(iter_pidfd);
|
||||||
|
|
||||||
|
/* Should have been killed */
|
||||||
|
ASSERT_TRUE(WIFSIGNALED(status));
|
||||||
|
ASSERT_EQ(WTERMSIG(status), SIGKILL);
|
||||||
|
|
||||||
|
munmap(map, page_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
|
|
@ -0,0 +1,138 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Minimal test case to reproduce KASAN out-of-bounds in listns pagination.
|
||||||
|
*
|
||||||
|
* The bug occurs when:
|
||||||
|
* 1. Filtering by a specific namespace type (e.g., CLONE_NEWUSER)
|
||||||
|
* 2. Using pagination (req.ns_id != 0)
|
||||||
|
* 3. The lookup_ns_id_at() call in do_listns() passes ns_type=0 instead of
|
||||||
|
* the filtered type, causing it to search the unified tree and potentially
|
||||||
|
* return a namespace of the wrong type.
|
||||||
|
*/
|
||||||
|
TEST(pagination_with_type_filter)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER, /* Filter by user namespace */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
pid_t pids[10];
|
||||||
|
int num_children = 10;
|
||||||
|
int i;
|
||||||
|
int sv[2];
|
||||||
|
__u64 first_batch[3];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
/* Create children with user namespaces */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
pids[i] = fork();
|
||||||
|
ASSERT_GE(pids[i], 0);
|
||||||
|
|
||||||
|
if (pids[i] == 0) {
|
||||||
|
char c;
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal parent we're ready */
|
||||||
|
if (write(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for parent signal to exit */
|
||||||
|
if (read(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
|
||||||
|
/* Wait for all children to signal ready */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c;
|
||||||
|
if (read(sv[0], &c, 1) != 1) {
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* First batch - this should work */
|
||||||
|
ret = sys_listns(&req, first_batch, 3, 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
close(sv[0]);
|
||||||
|
for (i = 0; i < num_children; i++)
|
||||||
|
kill(pids[i], SIGKILL);
|
||||||
|
for (i = 0; i < num_children; i++)
|
||||||
|
waitpid(pids[i], NULL, 0);
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("First batch returned %zd entries", ret);
|
||||||
|
|
||||||
|
if (ret == 3) {
|
||||||
|
__u64 second_batch[3];
|
||||||
|
|
||||||
|
/* Second batch - pagination triggers the bug */
|
||||||
|
req.ns_id = first_batch[2]; /* Continue from last ID */
|
||||||
|
ret = sys_listns(&req, second_batch, 3, 0);
|
||||||
|
|
||||||
|
TH_LOG("Second batch returned %zd entries", ret);
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal all children to exit */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c = 'X';
|
||||||
|
if (write(sv[0], &c, 1) != 1) {
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = i; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
/* Cleanup */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
int status;
|
||||||
|
waitpid(pids[i], &status, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
|
|
@ -0,0 +1,759 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include <sys/capability.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/prctl.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that unprivileged users can only see namespaces they're currently in.
|
||||||
|
* Create a namespace, drop privileges, verify we can only see our own namespaces.
|
||||||
|
*/
|
||||||
|
TEST(listns_unprivileged_current_only)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool found_ours;
|
||||||
|
int unexpected_count;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 our_netns_id;
|
||||||
|
bool found_ours;
|
||||||
|
int unexpected_count;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace to be unprivileged */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create a network namespace */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get our network namespace ID */
|
||||||
|
fd = open("/proc/self/ns/net", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &our_netns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Now we're unprivileged - list all network namespaces */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should only see our own network namespace */
|
||||||
|
found_ours = false;
|
||||||
|
unexpected_count = 0;
|
||||||
|
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == our_netns_id) {
|
||||||
|
found_ours = true;
|
||||||
|
} else {
|
||||||
|
/* This is either init_net (which we can see) or unexpected */
|
||||||
|
unexpected_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Send results to parent */
|
||||||
|
write(pipefd[1], &found_ours, sizeof(found_ours));
|
||||||
|
write(pipefd[1], &unexpected_count, sizeof(unexpected_count));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
found_ours = false;
|
||||||
|
unexpected_count = 0;
|
||||||
|
read(pipefd[0], &found_ours, sizeof(found_ours));
|
||||||
|
read(pipefd[0], &unexpected_count, sizeof(unexpected_count));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
/* Child should have seen its own namespace */
|
||||||
|
ASSERT_TRUE(found_ours);
|
||||||
|
|
||||||
|
TH_LOG("Unprivileged child saw its own namespace, plus %d others (likely init_net)",
|
||||||
|
unexpected_count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that users with CAP_SYS_ADMIN in a user namespace can see
|
||||||
|
* all namespaces owned by that user namespace.
|
||||||
|
*/
|
||||||
|
TEST(listns_cap_sys_admin_in_userns)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0, /* All types */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0, /* Will be set to our created user namespace */
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool success;
|
||||||
|
ssize_t count;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 userns_id;
|
||||||
|
ssize_t ret;
|
||||||
|
int min_expected;
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace - we'll have CAP_SYS_ADMIN in it */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get the user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Create several namespaces owned by this user namespace */
|
||||||
|
unshare(CLONE_NEWNET);
|
||||||
|
unshare(CLONE_NEWUTS);
|
||||||
|
unshare(CLONE_NEWIPC);
|
||||||
|
|
||||||
|
/* List namespaces owned by our user namespace */
|
||||||
|
req.user_ns_id = userns_id;
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We have CAP_SYS_ADMIN in this user namespace,
|
||||||
|
* so we should see all namespaces owned by it.
|
||||||
|
* That includes: net, uts, ipc, and the user namespace itself.
|
||||||
|
*/
|
||||||
|
min_expected = 4;
|
||||||
|
success = (ret >= min_expected);
|
||||||
|
|
||||||
|
write(pipefd[1], &success, sizeof(success));
|
||||||
|
write(pipefd[1], &ret, sizeof(ret));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
count = 0;
|
||||||
|
read(pipefd[0], &success, sizeof(success));
|
||||||
|
read(pipefd[0], &count, sizeof(count));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
ASSERT_TRUE(success);
|
||||||
|
TH_LOG("User with CAP_SYS_ADMIN saw %zd namespaces owned by their user namespace",
|
||||||
|
count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that users cannot see namespaces from unrelated user namespaces.
|
||||||
|
* Create two sibling user namespaces, verify they can't see each other's
|
||||||
|
* owned namespaces.
|
||||||
|
*/
|
||||||
|
TEST(listns_cannot_see_sibling_userns_namespaces)
|
||||||
|
{
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid1, pid2;
|
||||||
|
int status;
|
||||||
|
__u64 netns_a_id;
|
||||||
|
int pipefd2[2];
|
||||||
|
bool found_sibling_netns;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
/* Fork first child - creates user namespace A */
|
||||||
|
pid1 = fork();
|
||||||
|
ASSERT_GE(pid1, 0);
|
||||||
|
|
||||||
|
if (pid1 == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 netns_a_id;
|
||||||
|
char buf;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace A */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create network namespace owned by user namespace A */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get network namespace ID */
|
||||||
|
fd = open("/proc/self/ns/net", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &netns_a_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send namespace ID to parent */
|
||||||
|
write(pipefd[1], &netns_a_id, sizeof(netns_a_id));
|
||||||
|
|
||||||
|
/* Keep alive for sibling to check */
|
||||||
|
read(pipefd[1], &buf, 1);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent reads namespace A ID */
|
||||||
|
close(pipefd[1]);
|
||||||
|
netns_a_id = 0;
|
||||||
|
read(pipefd[0], &netns_a_id, sizeof(netns_a_id));
|
||||||
|
|
||||||
|
TH_LOG("User namespace A created network namespace with ID %llu",
|
||||||
|
(unsigned long long)netns_a_id);
|
||||||
|
|
||||||
|
/* Fork second child - creates user namespace B */
|
||||||
|
ASSERT_EQ(pipe(pipefd2), 0);
|
||||||
|
|
||||||
|
pid2 = fork();
|
||||||
|
ASSERT_GE(pid2, 0);
|
||||||
|
|
||||||
|
if (pid2 == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
bool found_sibling_netns;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
close(pipefd2[0]);
|
||||||
|
|
||||||
|
/* Create user namespace B (sibling to A) */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd2[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to list all network namespaces */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
found_sibling_netns = false;
|
||||||
|
if (ret > 0) {
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == netns_a_id) {
|
||||||
|
found_sibling_netns = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We should NOT see the sibling's network namespace */
|
||||||
|
write(pipefd2[1], &found_sibling_netns, sizeof(found_sibling_netns));
|
||||||
|
close(pipefd2[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent reads result from second child */
|
||||||
|
close(pipefd2[1]);
|
||||||
|
found_sibling_netns = false;
|
||||||
|
read(pipefd2[0], &found_sibling_netns, sizeof(found_sibling_netns));
|
||||||
|
close(pipefd2[0]);
|
||||||
|
|
||||||
|
/* Signal first child to exit */
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Wait for both children */
|
||||||
|
waitpid(pid2, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
|
||||||
|
waitpid(pid1, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
|
||||||
|
/* Second child should NOT have seen first child's namespace */
|
||||||
|
ASSERT_FALSE(found_sibling_netns);
|
||||||
|
TH_LOG("User namespace B correctly could not see sibling namespace A's network namespace");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test permission checking with LISTNS_CURRENT_USER.
|
||||||
|
* Verify that listing with LISTNS_CURRENT_USER respects permissions.
|
||||||
|
*/
|
||||||
|
TEST(listns_current_user_permissions)
|
||||||
|
{
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool success;
|
||||||
|
ssize_t count;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = LISTNS_CURRENT_USER,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
bool success;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create some namespaces owned by this user namespace */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unshare(CLONE_NEWUTS) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* List with LISTNS_CURRENT_USER - should see our owned namespaces */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
success = (ret >= 3); /* At least user, net, uts */
|
||||||
|
write(pipefd[1], &success, sizeof(success));
|
||||||
|
write(pipefd[1], &ret, sizeof(ret));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
success = false;
|
||||||
|
count = 0;
|
||||||
|
read(pipefd[0], &success, sizeof(success));
|
||||||
|
read(pipefd[0], &count, sizeof(count));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
ASSERT_TRUE(success);
|
||||||
|
TH_LOG("LISTNS_CURRENT_USER returned %zd namespaces", count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that CAP_SYS_ADMIN in parent user namespace allows seeing
|
||||||
|
* child user namespace's owned namespaces.
|
||||||
|
*/
|
||||||
|
TEST(listns_parent_userns_cap_sys_admin)
|
||||||
|
{
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool found_child_userns;
|
||||||
|
ssize_t count;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 parent_userns_id;
|
||||||
|
__u64 child_userns_id;
|
||||||
|
struct ns_id_req req;
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
bool found_child_userns;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create parent user namespace - we have CAP_SYS_ADMIN in it */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get parent user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &parent_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Create child user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get child user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Create namespaces owned by child user namespace */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* List namespaces owned by parent user namespace */
|
||||||
|
req.size = sizeof(req);
|
||||||
|
req.spare = 0;
|
||||||
|
req.ns_id = 0;
|
||||||
|
req.ns_type = 0;
|
||||||
|
req.spare2 = 0;
|
||||||
|
req.user_ns_id = parent_userns_id;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
/* Should see child user namespace in the list */
|
||||||
|
found_child_userns = false;
|
||||||
|
if (ret > 0) {
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == child_userns_id) {
|
||||||
|
found_child_userns = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write(pipefd[1], &found_child_userns, sizeof(found_child_userns));
|
||||||
|
write(pipefd[1], &ret, sizeof(ret));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
found_child_userns = false;
|
||||||
|
count = 0;
|
||||||
|
read(pipefd[0], &found_child_userns, sizeof(found_child_userns));
|
||||||
|
read(pipefd[0], &count, sizeof(count));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
ASSERT_TRUE(found_child_userns);
|
||||||
|
TH_LOG("Process with CAP_SYS_ADMIN in parent user namespace saw child user namespace (total: %zd)",
|
||||||
|
count);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that we can see user namespaces we have CAP_SYS_ADMIN inside of.
|
||||||
|
* This is different from seeing namespaces owned by a user namespace.
|
||||||
|
*/
|
||||||
|
TEST(listns_cap_sys_admin_inside_userns)
|
||||||
|
{
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool found_ours;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 our_userns_id;
|
||||||
|
struct ns_id_req req;
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
bool found_ours;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace - we have CAP_SYS_ADMIN inside it */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get our user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &our_userns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* List all user namespaces globally */
|
||||||
|
req.size = sizeof(req);
|
||||||
|
req.spare = 0;
|
||||||
|
req.ns_id = 0;
|
||||||
|
req.ns_type = CLONE_NEWUSER;
|
||||||
|
req.spare2 = 0;
|
||||||
|
req.user_ns_id = 0;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
/* We should be able to see our own user namespace */
|
||||||
|
found_ours = false;
|
||||||
|
if (ret > 0) {
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == our_userns_id) {
|
||||||
|
found_ours = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
write(pipefd[1], &found_ours, sizeof(found_ours));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
found_ours = false;
|
||||||
|
read(pipefd[0], &found_ours, sizeof(found_ours));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
ASSERT_TRUE(found_ours);
|
||||||
|
TH_LOG("Process can see user namespace it has CAP_SYS_ADMIN inside of");
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that dropping CAP_SYS_ADMIN restricts what we can see.
|
||||||
|
*/
|
||||||
|
TEST(listns_drop_cap_sys_admin)
|
||||||
|
{
|
||||||
|
cap_t caps;
|
||||||
|
cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
|
||||||
|
|
||||||
|
/* This test needs to start with CAP_SYS_ADMIN */
|
||||||
|
caps = cap_get_proc();
|
||||||
|
if (!caps) {
|
||||||
|
SKIP(return, "Cannot get capabilities");
|
||||||
|
}
|
||||||
|
|
||||||
|
cap_flag_value_t cap_val;
|
||||||
|
if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &cap_val) < 0) {
|
||||||
|
cap_free(caps);
|
||||||
|
SKIP(return, "Cannot check CAP_SYS_ADMIN");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (cap_val != CAP_SET) {
|
||||||
|
cap_free(caps);
|
||||||
|
SKIP(return, "Test needs CAP_SYS_ADMIN to start");
|
||||||
|
}
|
||||||
|
cap_free(caps);
|
||||||
|
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
bool correct;
|
||||||
|
ssize_t count_before, count_after;
|
||||||
|
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = LISTNS_CURRENT_USER,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[100];
|
||||||
|
ssize_t count_before;
|
||||||
|
__u64 ns_ids_after[100];
|
||||||
|
ssize_t count_after;
|
||||||
|
bool correct;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Count namespaces with CAP_SYS_ADMIN */
|
||||||
|
count_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
|
||||||
|
/* Drop CAP_SYS_ADMIN */
|
||||||
|
caps = cap_get_proc();
|
||||||
|
if (caps) {
|
||||||
|
cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR);
|
||||||
|
cap_set_flag(caps, CAP_PERMITTED, 1, cap_list, CAP_CLEAR);
|
||||||
|
cap_set_proc(caps);
|
||||||
|
cap_free(caps);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Ensure we can't regain the capability */
|
||||||
|
prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
|
||||||
|
|
||||||
|
/* Count namespaces without CAP_SYS_ADMIN */
|
||||||
|
count_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
|
||||||
|
/* Without CAP_SYS_ADMIN, we should see same or fewer namespaces */
|
||||||
|
correct = (count_after <= count_before);
|
||||||
|
|
||||||
|
write(pipefd[1], &correct, sizeof(correct));
|
||||||
|
write(pipefd[1], &count_before, sizeof(count_before));
|
||||||
|
write(pipefd[1], &count_after, sizeof(count_after));
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
correct = false;
|
||||||
|
count_before = 0;
|
||||||
|
count_after = 0;
|
||||||
|
read(pipefd[0], &correct, sizeof(correct));
|
||||||
|
read(pipefd[0], &count_before, sizeof(count_before));
|
||||||
|
read(pipefd[0], &count_after, sizeof(count_after));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
|
||||||
|
ASSERT_TRUE(correct);
|
||||||
|
TH_LOG("With CAP_SYS_ADMIN: %zd namespaces, without: %zd namespaces",
|
||||||
|
count_before, count_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
|
|
@ -0,0 +1,679 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test basic listns() functionality with the unified namespace tree.
|
||||||
|
* List all active namespaces globally.
|
||||||
|
*/
|
||||||
|
TEST(listns_basic_unified)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0, /* All types */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0, /* Global listing */
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Should find at least the initial namespaces */
|
||||||
|
ASSERT_GT(ret, 0);
|
||||||
|
TH_LOG("Found %zd active namespaces", ret);
|
||||||
|
|
||||||
|
/* Verify all returned IDs are non-zero */
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
ASSERT_NE(ns_ids[i], 0);
|
||||||
|
TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() with type filtering.
|
||||||
|
* List only network namespaces.
|
||||||
|
*/
|
||||||
|
TEST(listns_filter_by_type)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET, /* Only network namespaces */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
|
||||||
|
/* Should find at least init_net */
|
||||||
|
ASSERT_GT(ret, 0);
|
||||||
|
TH_LOG("Found %zd active network namespaces", ret);
|
||||||
|
|
||||||
|
/* Verify we can open each namespace and it's actually a network namespace */
|
||||||
|
for (ssize_t i = 0; i < ret && i < 5; i++) {
|
||||||
|
struct nsfs_file_handle nsfh = {
|
||||||
|
.ns_id = ns_ids[i],
|
||||||
|
.ns_type = CLONE_NEWNET,
|
||||||
|
.ns_inum = 0,
|
||||||
|
};
|
||||||
|
struct file_handle *fh;
|
||||||
|
int fd;
|
||||||
|
|
||||||
|
fh = (struct file_handle *)malloc(sizeof(*fh) + sizeof(nsfh));
|
||||||
|
ASSERT_NE(fh, NULL);
|
||||||
|
fh->handle_bytes = sizeof(nsfh);
|
||||||
|
fh->handle_type = 0;
|
||||||
|
memcpy(fh->f_handle, &nsfh, sizeof(nsfh));
|
||||||
|
|
||||||
|
fd = open_by_handle_at(-10003, fh, O_RDONLY);
|
||||||
|
free(fh);
|
||||||
|
|
||||||
|
if (fd >= 0) {
|
||||||
|
int ns_type;
|
||||||
|
/* Verify it's a network namespace via ioctl */
|
||||||
|
ns_type = ioctl(fd, NS_GET_NSTYPE);
|
||||||
|
if (ns_type >= 0) {
|
||||||
|
ASSERT_EQ(ns_type, CLONE_NEWNET);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() pagination.
|
||||||
|
* List namespaces in batches.
|
||||||
|
*/
|
||||||
|
TEST(listns_pagination)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 batch1[2], batch2[2];
|
||||||
|
ssize_t ret1, ret2;
|
||||||
|
|
||||||
|
/* Get first batch */
|
||||||
|
ret1 = sys_listns(&req, batch1, ARRAY_SIZE(batch1), 0);
|
||||||
|
if (ret1 < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret1, 0);
|
||||||
|
|
||||||
|
if (ret1 == 0)
|
||||||
|
SKIP(return, "No namespaces found");
|
||||||
|
|
||||||
|
TH_LOG("First batch: %zd namespaces", ret1);
|
||||||
|
|
||||||
|
/* Get second batch using last ID from first batch */
|
||||||
|
if (ret1 == ARRAY_SIZE(batch1)) {
|
||||||
|
req.ns_id = batch1[ret1 - 1];
|
||||||
|
ret2 = sys_listns(&req, batch2, ARRAY_SIZE(batch2), 0);
|
||||||
|
ASSERT_GE(ret2, 0);
|
||||||
|
|
||||||
|
TH_LOG("Second batch: %zd namespaces (after ns_id=%llu)",
|
||||||
|
ret2, (unsigned long long)req.ns_id);
|
||||||
|
|
||||||
|
/* If we got more results, verify IDs are monotonically increasing */
|
||||||
|
if (ret2 > 0) {
|
||||||
|
ASSERT_GT(batch2[0], batch1[ret1 - 1]);
|
||||||
|
TH_LOG("Pagination working: %llu > %llu",
|
||||||
|
(unsigned long long)batch2[0],
|
||||||
|
(unsigned long long)batch1[ret1 - 1]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
TH_LOG("All namespaces fit in first batch");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() with LISTNS_CURRENT_USER.
|
||||||
|
* List namespaces owned by current user namespace.
|
||||||
|
*/
|
||||||
|
TEST(listns_current_user)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = LISTNS_CURRENT_USER,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
|
||||||
|
/* Should find at least the initial namespaces if we're in init_user_ns */
|
||||||
|
TH_LOG("Found %zd namespaces owned by current user namespace", ret);
|
||||||
|
|
||||||
|
for (ssize_t i = 0; i < ret; i++)
|
||||||
|
TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that listns() only returns active namespaces.
|
||||||
|
* Create a namespace, let it become inactive, verify it's not listed.
|
||||||
|
*/
|
||||||
|
TEST(listns_only_active)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[100], ns_ids_after[100];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
int pipefd[2];
|
||||||
|
pid_t pid;
|
||||||
|
__u64 new_ns_id = 0;
|
||||||
|
int status;
|
||||||
|
|
||||||
|
/* Get initial list */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
|
||||||
|
TH_LOG("Before: %zd active network namespaces", ret_before);
|
||||||
|
|
||||||
|
/* Create a new namespace in a child process and get its ID */
|
||||||
|
ASSERT_EQ(pipe(pipefd), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 ns_id;
|
||||||
|
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
/* Create new network namespace */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get its ID */
|
||||||
|
fd = open("/proc/self/ns/net", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(pipefd[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send ID to parent */
|
||||||
|
write(pipefd[1], &ns_id, sizeof(ns_id));
|
||||||
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
/* Keep namespace active briefly */
|
||||||
|
usleep(100000);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent reads the new namespace ID */
|
||||||
|
{
|
||||||
|
int bytes;
|
||||||
|
|
||||||
|
close(pipefd[1]);
|
||||||
|
bytes = read(pipefd[0], &new_ns_id, sizeof(new_ns_id));
|
||||||
|
close(pipefd[0]);
|
||||||
|
|
||||||
|
if (bytes == sizeof(new_ns_id)) {
|
||||||
|
__u64 ns_ids_during[100];
|
||||||
|
int ret_during;
|
||||||
|
|
||||||
|
TH_LOG("Child created namespace with ID %llu", (unsigned long long)new_ns_id);
|
||||||
|
|
||||||
|
/* List namespaces while child is still alive - should see new one */
|
||||||
|
ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
|
||||||
|
ASSERT_GE(ret_during, 0);
|
||||||
|
TH_LOG("During: %d active network namespaces", ret_during);
|
||||||
|
|
||||||
|
/* Should have more namespaces than before */
|
||||||
|
ASSERT_GE(ret_during, ret_before);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for child to exit */
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
|
||||||
|
/* Give time for namespace to become inactive */
|
||||||
|
usleep(100000);
|
||||||
|
|
||||||
|
/* List namespaces after child exits - should not see new one */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
TH_LOG("After: %zd active network namespaces", ret_after);
|
||||||
|
|
||||||
|
/* Verify the new namespace ID is not in the after list */
|
||||||
|
if (new_ns_id != 0) {
|
||||||
|
bool found = false;
|
||||||
|
|
||||||
|
for (ssize_t i = 0; i < ret_after; i++) {
|
||||||
|
if (ns_ids_after[i] == new_ns_id) {
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ASSERT_FALSE(found);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() with specific user namespace ID.
|
||||||
|
* Create a user namespace and list namespaces it owns.
|
||||||
|
*/
|
||||||
|
TEST(listns_specific_userns)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0, /* Will be filled with created userns ID */
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
int sv[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
__u64 user_ns_id = 0;
|
||||||
|
int bytes;
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
__u64 ns_id;
|
||||||
|
char buf;
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
/* Create new user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get user namespace ID */
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &ns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send ID to parent */
|
||||||
|
if (write(sv[1], &ns_id, sizeof(ns_id)) != sizeof(ns_id)) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create some namespaces owned by this user namespace */
|
||||||
|
unshare(CLONE_NEWNET);
|
||||||
|
unshare(CLONE_NEWUTS);
|
||||||
|
|
||||||
|
/* Wait for parent signal */
|
||||||
|
if (read(sv[1], &buf, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(sv[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(sv[1]);
|
||||||
|
bytes = read(sv[0], &user_ns_id, sizeof(user_ns_id));
|
||||||
|
|
||||||
|
if (bytes != sizeof(user_ns_id)) {
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get user namespace ID from child");
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Child created user namespace with ID %llu", (unsigned long long)user_ns_id);
|
||||||
|
|
||||||
|
/* List namespaces owned by this user namespace */
|
||||||
|
req.user_ns_id = user_ns_id;
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
if (ret < 0) {
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Found %zd namespaces owned by user namespace %llu", ret,
|
||||||
|
(unsigned long long)user_ns_id);
|
||||||
|
|
||||||
|
/* Should find at least the network and UTS namespaces we created */
|
||||||
|
if (ret > 0) {
|
||||||
|
for (ssize_t i = 0; i < ret && i < 10; i++)
|
||||||
|
TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal child to exit */
|
||||||
|
if (write(sv[0], "X", 1) != 1) {
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
close(sv[0]);
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() with multiple namespace types filter.
|
||||||
|
*/
|
||||||
|
TEST(listns_multiple_types)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWNET | CLONE_NEWUTS, /* Network and UTS */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
TH_LOG("listns failed: %s (errno=%d)", strerror(errno), errno);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
|
||||||
|
TH_LOG("Found %zd active network/UTS namespaces", ret);
|
||||||
|
|
||||||
|
for (ssize_t i = 0; i < ret; i++)
|
||||||
|
TH_LOG(" [%zd] ns_id: %llu", i, (unsigned long long)ns_ids[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test that hierarchical active reference propagation keeps parent
|
||||||
|
* user namespaces visible in listns().
|
||||||
|
*/
|
||||||
|
TEST(listns_hierarchical_visibility)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 parent_ns_id = 0, child_ns_id = 0;
|
||||||
|
int sv[2];
|
||||||
|
pid_t pid;
|
||||||
|
int status;
|
||||||
|
int bytes;
|
||||||
|
__u64 ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
bool found_parent, found_child;
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int fd;
|
||||||
|
char buf;
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
/* Create parent user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &parent_ns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Create child user namespace */
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
fd = open("/proc/self/ns/user", O_RDONLY);
|
||||||
|
if (fd < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ioctl(fd, NS_GET_ID, &child_ns_id) < 0) {
|
||||||
|
close(fd);
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(fd);
|
||||||
|
|
||||||
|
/* Send both IDs to parent */
|
||||||
|
if (write(sv[1], &parent_ns_id, sizeof(parent_ns_id)) != sizeof(parent_ns_id)) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
if (write(sv[1], &child_ns_id, sizeof(child_ns_id)) != sizeof(child_ns_id)) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for parent signal */
|
||||||
|
if (read(sv[1], &buf, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(sv[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent */
|
||||||
|
close(sv[1]);
|
||||||
|
|
||||||
|
/* Read both namespace IDs */
|
||||||
|
bytes = read(sv[0], &parent_ns_id, sizeof(parent_ns_id));
|
||||||
|
bytes += read(sv[0], &child_ns_id, sizeof(child_ns_id));
|
||||||
|
|
||||||
|
if (bytes != (int)(2 * sizeof(__u64))) {
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "Failed to get namespace IDs from child");
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Parent user namespace ID: %llu", (unsigned long long)parent_ns_id);
|
||||||
|
TH_LOG("Child user namespace ID: %llu", (unsigned long long)child_ns_id);
|
||||||
|
|
||||||
|
/* List all user namespaces */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
|
||||||
|
if (ret < 0 && errno == ENOSYS) {
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
}
|
||||||
|
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
TH_LOG("Found %zd active user namespaces", ret);
|
||||||
|
|
||||||
|
/* Both parent and child should be visible (active due to child process) */
|
||||||
|
found_parent = false;
|
||||||
|
found_child = false;
|
||||||
|
for (ssize_t i = 0; i < ret; i++) {
|
||||||
|
if (ns_ids[i] == parent_ns_id)
|
||||||
|
found_parent = true;
|
||||||
|
if (ns_ids[i] == child_ns_id)
|
||||||
|
found_child = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Parent namespace %s, child namespace %s",
|
||||||
|
found_parent ? "found" : "NOT FOUND",
|
||||||
|
found_child ? "found" : "NOT FOUND");
|
||||||
|
|
||||||
|
ASSERT_TRUE(found_child);
|
||||||
|
/* With hierarchical propagation, parent should also be active */
|
||||||
|
ASSERT_TRUE(found_parent);
|
||||||
|
|
||||||
|
/* Signal child to exit */
|
||||||
|
if (write(sv[0], "X", 1) != 1) {
|
||||||
|
close(sv[0]);
|
||||||
|
kill(pid, SIGKILL);
|
||||||
|
waitpid(pid, NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
close(sv[0]);
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test error cases for listns().
|
||||||
|
*/
|
||||||
|
TEST(listns_error_cases)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids[10];
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Test with invalid flags */
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0xFFFF);
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
/* listns() not supported, skip this check */
|
||||||
|
} else {
|
||||||
|
ASSERT_LT(ret, 0);
|
||||||
|
ASSERT_EQ(errno, EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test with NULL ns_ids array */
|
||||||
|
ret = sys_listns(&req, NULL, 10, 0);
|
||||||
|
ASSERT_LT(ret, 0);
|
||||||
|
|
||||||
|
/* Test with invalid spare field */
|
||||||
|
req.spare = 1;
|
||||||
|
ret = sys_listns(&req, ns_ids, ARRAY_SIZE(ns_ids), 0);
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
/* listns() not supported, skip this check */
|
||||||
|
} else {
|
||||||
|
ASSERT_LT(ret, 0);
|
||||||
|
ASSERT_EQ(errno, EINVAL);
|
||||||
|
}
|
||||||
|
req.spare = 0;
|
||||||
|
|
||||||
|
/* Test with huge nr_ns_ids */
|
||||||
|
ret = sys_listns(&req, ns_ids, 2000000, 0);
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
/* listns() not supported, skip this check */
|
||||||
|
} else {
|
||||||
|
ASSERT_LT(ret, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -6,6 +6,7 @@
|
||||||
#include <libgen.h>
|
#include <libgen.h>
|
||||||
#include <limits.h>
|
#include <limits.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
|
#include <signal.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include <sys/mount.h>
|
#include <sys/mount.h>
|
||||||
#include <poll.h>
|
#include <poll.h>
|
||||||
|
|
@ -14,12 +15,30 @@
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <sys/socket.h>
|
#include <sys/socket.h>
|
||||||
#include <sys/un.h>
|
#include <sys/un.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
#include <unistd.h>
|
#include <unistd.h>
|
||||||
#include <linux/fs.h>
|
#include <linux/fs.h>
|
||||||
#include <linux/limits.h>
|
#include <linux/limits.h>
|
||||||
#include <linux/nsfs.h>
|
#include <linux/nsfs.h>
|
||||||
#include "../kselftest_harness.h"
|
#include "../kselftest_harness.h"
|
||||||
|
|
||||||
|
/* Fixture for tests that create child processes */
|
||||||
|
FIXTURE(nsid) {
|
||||||
|
pid_t child_pid;
|
||||||
|
};
|
||||||
|
|
||||||
|
FIXTURE_SETUP(nsid) {
|
||||||
|
self->child_pid = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
FIXTURE_TEARDOWN(nsid) {
|
||||||
|
/* Clean up any child process that may still be running */
|
||||||
|
if (self->child_pid > 0) {
|
||||||
|
kill(self->child_pid, SIGKILL);
|
||||||
|
waitpid(self->child_pid, NULL, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
TEST(nsid_mntns_basic)
|
TEST(nsid_mntns_basic)
|
||||||
{
|
{
|
||||||
__u64 mnt_ns_id = 0;
|
__u64 mnt_ns_id = 0;
|
||||||
|
|
@ -44,7 +63,7 @@ TEST(nsid_mntns_basic)
|
||||||
close(fd_mntns);
|
close(fd_mntns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_mntns_separate)
|
TEST_F(nsid, mntns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_mnt_ns_id = 0;
|
__u64 parent_mnt_ns_id = 0;
|
||||||
__u64 child_mnt_ns_id = 0;
|
__u64 child_mnt_ns_id = 0;
|
||||||
|
|
@ -90,6 +109,9 @@ TEST(nsid_mntns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -99,8 +121,6 @@ TEST(nsid_mntns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_mntns);
|
close(fd_parent_mntns);
|
||||||
SKIP(return, "No permission to create mount namespace");
|
SKIP(return, "No permission to create mount namespace");
|
||||||
}
|
}
|
||||||
|
|
@ -123,10 +143,6 @@ TEST(nsid_mntns_separate)
|
||||||
|
|
||||||
close(fd_parent_mntns);
|
close(fd_parent_mntns);
|
||||||
close(fd_child_mntns);
|
close(fd_child_mntns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_cgroupns_basic)
|
TEST(nsid_cgroupns_basic)
|
||||||
|
|
@ -153,7 +169,7 @@ TEST(nsid_cgroupns_basic)
|
||||||
close(fd_cgroupns);
|
close(fd_cgroupns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_cgroupns_separate)
|
TEST_F(nsid, cgroupns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_cgroup_ns_id = 0;
|
__u64 parent_cgroup_ns_id = 0;
|
||||||
__u64 child_cgroup_ns_id = 0;
|
__u64 child_cgroup_ns_id = 0;
|
||||||
|
|
@ -199,6 +215,9 @@ TEST(nsid_cgroupns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -208,8 +227,6 @@ TEST(nsid_cgroupns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_cgroupns);
|
close(fd_parent_cgroupns);
|
||||||
SKIP(return, "No permission to create cgroup namespace");
|
SKIP(return, "No permission to create cgroup namespace");
|
||||||
}
|
}
|
||||||
|
|
@ -232,10 +249,6 @@ TEST(nsid_cgroupns_separate)
|
||||||
|
|
||||||
close(fd_parent_cgroupns);
|
close(fd_parent_cgroupns);
|
||||||
close(fd_child_cgroupns);
|
close(fd_child_cgroupns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_ipcns_basic)
|
TEST(nsid_ipcns_basic)
|
||||||
|
|
@ -262,7 +275,7 @@ TEST(nsid_ipcns_basic)
|
||||||
close(fd_ipcns);
|
close(fd_ipcns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_ipcns_separate)
|
TEST_F(nsid, ipcns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_ipc_ns_id = 0;
|
__u64 parent_ipc_ns_id = 0;
|
||||||
__u64 child_ipc_ns_id = 0;
|
__u64 child_ipc_ns_id = 0;
|
||||||
|
|
@ -308,6 +321,9 @@ TEST(nsid_ipcns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -317,8 +333,6 @@ TEST(nsid_ipcns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_ipcns);
|
close(fd_parent_ipcns);
|
||||||
SKIP(return, "No permission to create IPC namespace");
|
SKIP(return, "No permission to create IPC namespace");
|
||||||
}
|
}
|
||||||
|
|
@ -341,10 +355,6 @@ TEST(nsid_ipcns_separate)
|
||||||
|
|
||||||
close(fd_parent_ipcns);
|
close(fd_parent_ipcns);
|
||||||
close(fd_child_ipcns);
|
close(fd_child_ipcns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_utsns_basic)
|
TEST(nsid_utsns_basic)
|
||||||
|
|
@ -371,7 +381,7 @@ TEST(nsid_utsns_basic)
|
||||||
close(fd_utsns);
|
close(fd_utsns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_utsns_separate)
|
TEST_F(nsid, utsns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_uts_ns_id = 0;
|
__u64 parent_uts_ns_id = 0;
|
||||||
__u64 child_uts_ns_id = 0;
|
__u64 child_uts_ns_id = 0;
|
||||||
|
|
@ -417,6 +427,9 @@ TEST(nsid_utsns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -426,8 +439,6 @@ TEST(nsid_utsns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_utsns);
|
close(fd_parent_utsns);
|
||||||
SKIP(return, "No permission to create UTS namespace");
|
SKIP(return, "No permission to create UTS namespace");
|
||||||
}
|
}
|
||||||
|
|
@ -450,10 +461,6 @@ TEST(nsid_utsns_separate)
|
||||||
|
|
||||||
close(fd_parent_utsns);
|
close(fd_parent_utsns);
|
||||||
close(fd_child_utsns);
|
close(fd_child_utsns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_userns_basic)
|
TEST(nsid_userns_basic)
|
||||||
|
|
@ -480,7 +487,7 @@ TEST(nsid_userns_basic)
|
||||||
close(fd_userns);
|
close(fd_userns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_userns_separate)
|
TEST_F(nsid, userns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_user_ns_id = 0;
|
__u64 parent_user_ns_id = 0;
|
||||||
__u64 child_user_ns_id = 0;
|
__u64 child_user_ns_id = 0;
|
||||||
|
|
@ -526,6 +533,9 @@ TEST(nsid_userns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -535,8 +545,6 @@ TEST(nsid_userns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_userns);
|
close(fd_parent_userns);
|
||||||
SKIP(return, "No permission to create user namespace");
|
SKIP(return, "No permission to create user namespace");
|
||||||
}
|
}
|
||||||
|
|
@ -559,10 +567,6 @@ TEST(nsid_userns_separate)
|
||||||
|
|
||||||
close(fd_parent_userns);
|
close(fd_parent_userns);
|
||||||
close(fd_child_userns);
|
close(fd_child_userns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_timens_basic)
|
TEST(nsid_timens_basic)
|
||||||
|
|
@ -591,7 +595,7 @@ TEST(nsid_timens_basic)
|
||||||
close(fd_timens);
|
close(fd_timens);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_timens_separate)
|
TEST_F(nsid, timens_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_time_ns_id = 0;
|
__u64 parent_time_ns_id = 0;
|
||||||
__u64 child_time_ns_id = 0;
|
__u64 child_time_ns_id = 0;
|
||||||
|
|
@ -652,6 +656,9 @@ TEST(nsid_timens_separate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -660,8 +667,6 @@ TEST(nsid_timens_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_timens);
|
close(fd_parent_timens);
|
||||||
close(pipefd[0]);
|
close(pipefd[0]);
|
||||||
SKIP(return, "Cannot create time namespace");
|
SKIP(return, "Cannot create time namespace");
|
||||||
|
|
@ -689,10 +694,6 @@ TEST(nsid_timens_separate)
|
||||||
|
|
||||||
close(fd_parent_timens);
|
close(fd_parent_timens);
|
||||||
close(fd_child_timens);
|
close(fd_child_timens);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_pidns_basic)
|
TEST(nsid_pidns_basic)
|
||||||
|
|
@ -719,7 +720,7 @@ TEST(nsid_pidns_basic)
|
||||||
close(fd_pidns);
|
close(fd_pidns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_pidns_separate)
|
TEST_F(nsid, pidns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_pid_ns_id = 0;
|
__u64 parent_pid_ns_id = 0;
|
||||||
__u64 child_pid_ns_id = 0;
|
__u64 child_pid_ns_id = 0;
|
||||||
|
|
@ -776,6 +777,9 @@ TEST(nsid_pidns_separate)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -784,8 +788,6 @@ TEST(nsid_pidns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_pidns);
|
close(fd_parent_pidns);
|
||||||
close(pipefd[0]);
|
close(pipefd[0]);
|
||||||
SKIP(return, "No permission to create PID namespace");
|
SKIP(return, "No permission to create PID namespace");
|
||||||
|
|
@ -813,10 +815,6 @@ TEST(nsid_pidns_separate)
|
||||||
|
|
||||||
close(fd_parent_pidns);
|
close(fd_parent_pidns);
|
||||||
close(fd_child_pidns);
|
close(fd_child_pidns);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_netns_basic)
|
TEST(nsid_netns_basic)
|
||||||
|
|
@ -860,7 +858,7 @@ TEST(nsid_netns_basic)
|
||||||
close(fd_netns);
|
close(fd_netns);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(nsid_netns_separate)
|
TEST_F(nsid, netns_separate)
|
||||||
{
|
{
|
||||||
__u64 parent_net_ns_id = 0;
|
__u64 parent_net_ns_id = 0;
|
||||||
__u64 parent_netns_cookie = 0;
|
__u64 parent_netns_cookie = 0;
|
||||||
|
|
@ -920,6 +918,9 @@ TEST(nsid_netns_separate)
|
||||||
_exit(0);
|
_exit(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Track child for cleanup */
|
||||||
|
self->child_pid = pid;
|
||||||
|
|
||||||
/* Parent process */
|
/* Parent process */
|
||||||
close(pipefd[1]);
|
close(pipefd[1]);
|
||||||
|
|
||||||
|
|
@ -929,8 +930,6 @@ TEST(nsid_netns_separate)
|
||||||
|
|
||||||
if (buf == 'S') {
|
if (buf == 'S') {
|
||||||
/* Child couldn't create namespace, skip test */
|
/* Child couldn't create namespace, skip test */
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
close(fd_parent_netns);
|
close(fd_parent_netns);
|
||||||
close(parent_sock);
|
close(parent_sock);
|
||||||
SKIP(return, "No permission to create network namespace");
|
SKIP(return, "No permission to create network namespace");
|
||||||
|
|
@ -977,10 +976,6 @@ TEST(nsid_netns_separate)
|
||||||
close(fd_parent_netns);
|
close(fd_parent_netns);
|
||||||
close(fd_child_netns);
|
close(fd_child_netns);
|
||||||
close(parent_sock);
|
close(parent_sock);
|
||||||
|
|
||||||
/* Clean up child process */
|
|
||||||
kill(pid, SIGTERM);
|
|
||||||
waitpid(pid, NULL, 0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_HARNESS_MAIN
|
TEST_HARNESS_MAIN
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,113 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <signal.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include "../pidfd/pidfd.h"
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Regression tests for the setns(pidfd) active reference counting bug.
|
||||||
|
*
|
||||||
|
* These tests are based on the reproducers that triggered the race condition
|
||||||
|
* fixed by commit 1c465d0518dc ("ns: handle setns(pidfd, ...) cleanly").
|
||||||
|
*
|
||||||
|
* The bug: When using setns() with a pidfd, if the target task exits between
|
||||||
|
* prepare_nsset() and commit_nsset(), the namespaces would become inactive.
|
||||||
|
* Then ns_ref_active_get() would increment from 0 without properly resurrecting
|
||||||
|
* the owner chain, causing active reference count underflows.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Simple pidfd setns test using create_child()+unshare().
|
||||||
|
*
|
||||||
|
* Without the fix, this would trigger active refcount warnings when the
|
||||||
|
* parent exits after doing setns(pidfd) on a child that has already exited.
|
||||||
|
*/
|
||||||
|
TEST(simple_pidfd_setns)
|
||||||
|
{
|
||||||
|
pid_t child_pid;
|
||||||
|
int pidfd = -1;
|
||||||
|
int ret;
|
||||||
|
int sv[2];
|
||||||
|
char c;
|
||||||
|
|
||||||
|
/* Ignore SIGCHLD for autoreap */
|
||||||
|
ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
/* Create a child process without namespaces initially */
|
||||||
|
child_pid = create_child(&pidfd, 0);
|
||||||
|
ASSERT_GE(child_pid, 0);
|
||||||
|
|
||||||
|
if (child_pid == 0) {
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
if (unshare(CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET | CLONE_NEWUSER) < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
_exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal parent that namespaces are ready */
|
||||||
|
if (write_nointr(sv[1], "1", 1) < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
_exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
_exit(0);
|
||||||
|
}
|
||||||
|
ASSERT_GE(pidfd, 0);
|
||||||
|
EXPECT_EQ(close(sv[1]), 0);
|
||||||
|
|
||||||
|
ret = read_nointr(sv[0], &c, 1);
|
||||||
|
ASSERT_EQ(ret, 1);
|
||||||
|
EXPECT_EQ(close(sv[0]), 0);
|
||||||
|
|
||||||
|
/* Set to child's namespaces via pidfd */
|
||||||
|
ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
|
||||||
|
TH_LOG("setns() returned %d", ret);
|
||||||
|
close(pidfd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Simple pidfd setns test using create_child().
|
||||||
|
*
|
||||||
|
* This variation uses create_child() with namespace flags directly.
|
||||||
|
* Namespaces are created immediately at clone time.
|
||||||
|
*/
|
||||||
|
TEST(simple_pidfd_setns_clone)
|
||||||
|
{
|
||||||
|
pid_t child_pid;
|
||||||
|
int pidfd = -1;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* Ignore SIGCHLD for autoreap */
|
||||||
|
ASSERT_NE(signal(SIGCHLD, SIG_IGN), SIG_ERR);
|
||||||
|
|
||||||
|
/* Create a child process with new namespaces using create_child() */
|
||||||
|
child_pid = create_child(&pidfd, CLONE_NEWUSER | CLONE_NEWUTS | CLONE_NEWIPC | CLONE_NEWNET);
|
||||||
|
ASSERT_GE(child_pid, 0);
|
||||||
|
|
||||||
|
if (child_pid == 0) {
|
||||||
|
/* Child: sleep for a while so parent can setns to us */
|
||||||
|
sleep(2);
|
||||||
|
_exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent: pidfd was already created by create_child() */
|
||||||
|
ASSERT_GE(pidfd, 0);
|
||||||
|
|
||||||
|
/* Set to child's namespaces via pidfd */
|
||||||
|
ret = setns(pidfd, CLONE_NEWUTS | CLONE_NEWIPC);
|
||||||
|
close(pidfd);
|
||||||
|
TH_LOG("setns() returned %d", ret);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,626 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <errno.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
#include <limits.h>
|
||||||
|
#include <sched.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <sys/ioctl.h>
|
||||||
|
#include <sys/socket.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include "../kselftest_harness.h"
|
||||||
|
#include "../filesystems/utils.h"
|
||||||
|
#include "wrappers.h"
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Stress tests for namespace active reference counting.
|
||||||
|
*
|
||||||
|
* These tests validate that the active reference counting system can handle
|
||||||
|
* high load scenarios including rapid namespace creation/destruction, large
|
||||||
|
* numbers of concurrent namespaces, and various edge cases under stress.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test rapid creation and destruction of user namespaces.
|
||||||
|
* Create and destroy namespaces in quick succession to stress the
|
||||||
|
* active reference tracking and ensure no leaks occur.
|
||||||
|
*/
|
||||||
|
TEST(rapid_namespace_creation_destruction)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[256], ns_ids_after[256];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Get baseline count of active user namespaces */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
||||||
|
|
||||||
|
/* Rapidly create and destroy 100 user namespaces */
|
||||||
|
for (i = 0; i < 100; i++) {
|
||||||
|
pid_t pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child: create user namespace and immediately exit */
|
||||||
|
if (setup_userns() < 0)
|
||||||
|
exit(1);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent: wait for child */
|
||||||
|
int status;
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline (no leaked namespaces) */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test creating many concurrent namespaces.
|
||||||
|
* Verify that listns() correctly tracks all of them and that they all
|
||||||
|
* become inactive after processes exit.
|
||||||
|
*/
|
||||||
|
TEST(many_concurrent_namespaces)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512];
|
||||||
|
ssize_t ret_before, ret_during, ret_after;
|
||||||
|
pid_t pids[50];
|
||||||
|
int num_children = 50;
|
||||||
|
int i;
|
||||||
|
int sv[2];
|
||||||
|
|
||||||
|
/* Get baseline */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
/* Create many children, each with their own user namespace */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
pids[i] = fork();
|
||||||
|
ASSERT_GE(pids[i], 0);
|
||||||
|
|
||||||
|
if (pids[i] == 0) {
|
||||||
|
/* Child: create user namespace and wait for parent signal */
|
||||||
|
char c;
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal parent we're ready */
|
||||||
|
if (write(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for parent signal to exit */
|
||||||
|
if (read(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
|
||||||
|
/* Wait for all children to signal ready */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c;
|
||||||
|
if (read(sv[0], &c, 1) != 1) {
|
||||||
|
/* If we fail to read, kill all children and exit */
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* List namespaces while all children are running */
|
||||||
|
ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
|
||||||
|
ASSERT_GE(ret_during, 0);
|
||||||
|
|
||||||
|
TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during);
|
||||||
|
|
||||||
|
/* Should have at least num_children more namespaces than baseline */
|
||||||
|
ASSERT_GE(ret_during, ret_before + num_children);
|
||||||
|
|
||||||
|
/* Signal all children to exit */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c = 'X';
|
||||||
|
if (write(sv[0], &c, 1) != 1) {
|
||||||
|
/* If we fail to write, kill remaining children */
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = i; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
/* Wait for all children */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
int status;
|
||||||
|
waitpid(pids[i], &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After all children exit: %zd active user namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test rapid namespace creation with different namespace types.
|
||||||
|
* Create multiple types of namespaces rapidly to stress the tracking system.
|
||||||
|
*/
|
||||||
|
TEST(rapid_mixed_namespace_creation)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0, /* All types */
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Get baseline count */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active namespaces (all types)", ret_before);
|
||||||
|
|
||||||
|
/* Rapidly create and destroy namespaces with multiple types */
|
||||||
|
for (i = 0; i < 50; i++) {
|
||||||
|
pid_t pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
/* Child: create multiple namespace types */
|
||||||
|
if (setup_userns() < 0)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
/* Create additional namespace types */
|
||||||
|
if (unshare(CLONE_NEWNET) < 0)
|
||||||
|
exit(1);
|
||||||
|
if (unshare(CLONE_NEWUTS) < 0)
|
||||||
|
exit(1);
|
||||||
|
if (unshare(CLONE_NEWIPC) < 0)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent: wait for child */
|
||||||
|
int status;
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test nested namespace creation under stress.
|
||||||
|
* Create deeply nested namespace hierarchies and verify proper cleanup.
|
||||||
|
*/
|
||||||
|
TEST(nested_namespace_stress)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Get baseline */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
||||||
|
|
||||||
|
/* Create 20 processes, each with nested user namespaces */
|
||||||
|
for (i = 0; i < 20; i++) {
|
||||||
|
pid_t pid = fork();
|
||||||
|
ASSERT_GE(pid, 0);
|
||||||
|
|
||||||
|
if (pid == 0) {
|
||||||
|
int userns_fd;
|
||||||
|
uid_t orig_uid = getuid();
|
||||||
|
int depth;
|
||||||
|
|
||||||
|
/* Create nested user namespaces (up to 5 levels) */
|
||||||
|
for (depth = 0; depth < 5; depth++) {
|
||||||
|
userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1);
|
||||||
|
if (userns_fd < 0)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
||||||
|
close(userns_fd);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
close(userns_fd);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Parent: wait for child */
|
||||||
|
int status;
|
||||||
|
waitpid(pid, &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test listns() pagination under stress.
|
||||||
|
* Create many namespaces and verify pagination works correctly.
|
||||||
|
*/
|
||||||
|
TEST(listns_pagination_stress)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
pid_t pids[30];
|
||||||
|
int num_children = 30;
|
||||||
|
int i;
|
||||||
|
int sv[2];
|
||||||
|
__u64 all_ns_ids[512];
|
||||||
|
int total_found = 0;
|
||||||
|
|
||||||
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
||||||
|
|
||||||
|
/* Create many children with user namespaces */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
pids[i] = fork();
|
||||||
|
ASSERT_GE(pids[i], 0);
|
||||||
|
|
||||||
|
if (pids[i] == 0) {
|
||||||
|
char c;
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
if (setup_userns() < 0) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal parent we're ready */
|
||||||
|
if (write(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for parent signal to exit */
|
||||||
|
if (read(sv[1], &c, 1) != 1) {
|
||||||
|
close(sv[1]);
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[1]);
|
||||||
|
|
||||||
|
/* Wait for all children to signal ready */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c;
|
||||||
|
if (read(sv[0], &c, 1) != 1) {
|
||||||
|
/* If we fail to read, kill all children and exit */
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Paginate through all namespaces using small batch sizes */
|
||||||
|
req.ns_id = 0;
|
||||||
|
while (1) {
|
||||||
|
__u64 batch[5]; /* Small batch size to force pagination */
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0);
|
||||||
|
if (ret < 0) {
|
||||||
|
if (errno == ENOSYS) {
|
||||||
|
close(sv[0]);
|
||||||
|
for (i = 0; i < num_children; i++)
|
||||||
|
kill(pids[i], SIGKILL);
|
||||||
|
for (i = 0; i < num_children; i++)
|
||||||
|
waitpid(pids[i], NULL, 0);
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
}
|
||||||
|
ASSERT_GE(ret, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (ret == 0)
|
||||||
|
break;
|
||||||
|
|
||||||
|
/* Store results */
|
||||||
|
for (i = 0; i < ret && total_found < 512; i++) {
|
||||||
|
all_ns_ids[total_found++] = batch[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Update cursor for next batch */
|
||||||
|
if (ret == ARRAY_SIZE(batch))
|
||||||
|
req.ns_id = batch[ret - 1];
|
||||||
|
else
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Paginated through %d user namespaces", total_found);
|
||||||
|
|
||||||
|
/* Verify no duplicates in pagination */
|
||||||
|
for (i = 0; i < total_found; i++) {
|
||||||
|
for (int j = i + 1; j < total_found; j++) {
|
||||||
|
if (all_ns_ids[i] == all_ns_ids[j]) {
|
||||||
|
TH_LOG("Found duplicate ns_id: %llu at positions %d and %d",
|
||||||
|
(unsigned long long)all_ns_ids[i], i, j);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Signal all children to exit */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
char c = 'X';
|
||||||
|
if (write(sv[0], &c, 1) != 1) {
|
||||||
|
close(sv[0]);
|
||||||
|
for (int j = i; j < num_children; j++)
|
||||||
|
kill(pids[j], SIGKILL);
|
||||||
|
for (int j = 0; j < num_children; j++)
|
||||||
|
waitpid(pids[j], NULL, 0);
|
||||||
|
ASSERT_TRUE(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
close(sv[0]);
|
||||||
|
|
||||||
|
/* Wait for all children */
|
||||||
|
for (i = 0; i < num_children; i++) {
|
||||||
|
int status;
|
||||||
|
waitpid(pids[i], &status, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test concurrent namespace operations.
|
||||||
|
* Multiple processes creating, querying, and destroying namespaces concurrently.
|
||||||
|
*/
|
||||||
|
TEST(concurrent_namespace_operations)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = 0,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
pid_t pids[20];
|
||||||
|
int num_workers = 20;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Get baseline */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active namespaces", ret_before);
|
||||||
|
|
||||||
|
/* Create worker processes that do concurrent operations */
|
||||||
|
for (i = 0; i < num_workers; i++) {
|
||||||
|
pids[i] = fork();
|
||||||
|
ASSERT_GE(pids[i], 0);
|
||||||
|
|
||||||
|
if (pids[i] == 0) {
|
||||||
|
/* Each worker: create namespaces, list them, repeat */
|
||||||
|
int iterations;
|
||||||
|
|
||||||
|
for (iterations = 0; iterations < 10; iterations++) {
|
||||||
|
int userns_fd;
|
||||||
|
__u64 temp_ns_ids[100];
|
||||||
|
ssize_t ret;
|
||||||
|
|
||||||
|
/* Create a user namespace */
|
||||||
|
userns_fd = get_userns_fd(0, getuid(), 1);
|
||||||
|
if (userns_fd < 0)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* List namespaces */
|
||||||
|
ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0);
|
||||||
|
(void)ret;
|
||||||
|
|
||||||
|
close(userns_fd);
|
||||||
|
|
||||||
|
/* Small delay */
|
||||||
|
usleep(1000);
|
||||||
|
}
|
||||||
|
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for all workers */
|
||||||
|
for (i = 0; i < num_workers; i++) {
|
||||||
|
int status;
|
||||||
|
waitpid(pids[i], &status, 0);
|
||||||
|
ASSERT_TRUE(WIFEXITED(status));
|
||||||
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After concurrent operations: %zd active namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Test namespace churn - continuous creation and destruction.
|
||||||
|
* Simulates high-churn scenarios like container orchestration.
|
||||||
|
*/
|
||||||
|
TEST(namespace_churn)
|
||||||
|
{
|
||||||
|
struct ns_id_req req = {
|
||||||
|
.size = sizeof(req),
|
||||||
|
.spare = 0,
|
||||||
|
.ns_id = 0,
|
||||||
|
.ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS,
|
||||||
|
.spare2 = 0,
|
||||||
|
.user_ns_id = 0,
|
||||||
|
};
|
||||||
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
||||||
|
ssize_t ret_before, ret_after;
|
||||||
|
int cycle;
|
||||||
|
|
||||||
|
/* Get baseline */
|
||||||
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
||||||
|
if (ret_before < 0) {
|
||||||
|
if (errno == ENOSYS)
|
||||||
|
SKIP(return, "listns() not supported");
|
||||||
|
ASSERT_GE(ret_before, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
TH_LOG("Baseline: %zd active namespaces", ret_before);
|
||||||
|
|
||||||
|
/* Simulate churn: batches of namespaces created and destroyed */
|
||||||
|
for (cycle = 0; cycle < 10; cycle++) {
|
||||||
|
pid_t batch_pids[10];
|
||||||
|
int i;
|
||||||
|
|
||||||
|
/* Create batch */
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
batch_pids[i] = fork();
|
||||||
|
ASSERT_GE(batch_pids[i], 0);
|
||||||
|
|
||||||
|
if (batch_pids[i] == 0) {
|
||||||
|
/* Create multiple namespace types */
|
||||||
|
if (setup_userns() < 0)
|
||||||
|
exit(1);
|
||||||
|
if (unshare(CLONE_NEWNET) < 0)
|
||||||
|
exit(1);
|
||||||
|
if (unshare(CLONE_NEWUTS) < 0)
|
||||||
|
exit(1);
|
||||||
|
|
||||||
|
/* Keep namespaces alive briefly */
|
||||||
|
usleep(10000);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Wait for batch to complete */
|
||||||
|
for (i = 0; i < 10; i++) {
|
||||||
|
int status;
|
||||||
|
waitpid(batch_pids[i], &status, 0);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Verify we're back to baseline */
|
||||||
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
||||||
|
ASSERT_GE(ret_after, 0);
|
||||||
|
|
||||||
|
TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after);
|
||||||
|
ASSERT_EQ(ret_before, ret_after);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_HARNESS_MAIN
|
||||||
|
|
@ -0,0 +1,35 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
|
||||||
|
#include <linux/nsfs.h>
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <sys/syscall.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
|
||||||
|
#ifndef __SELFTESTS_NAMESPACES_WRAPPERS_H__
|
||||||
|
#define __SELFTESTS_NAMESPACES_WRAPPERS_H__
|
||||||
|
|
||||||
|
#ifndef __NR_listns
|
||||||
|
#if defined __alpha__
|
||||||
|
#define __NR_listns 580
|
||||||
|
#elif defined _MIPS_SIM
|
||||||
|
#if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
|
||||||
|
#define __NR_listns 4470
|
||||||
|
#endif
|
||||||
|
#if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
|
||||||
|
#define __NR_listns 6470
|
||||||
|
#endif
|
||||||
|
#if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
|
||||||
|
#define __NR_listns 5470
|
||||||
|
#endif
|
||||||
|
#else
|
||||||
|
#define __NR_listns 470
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
static inline int sys_listns(const struct ns_id_req *req, __u64 *ns_ids,
|
||||||
|
size_t nr_ns_ids, unsigned int flags)
|
||||||
|
{
|
||||||
|
return syscall(__NR_listns, req, ns_ids, nr_ns_ids, flags);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* __SELFTESTS_NAMESPACES_WRAPPERS_H__ */
|
||||||
Loading…
Reference in New Issue