mirror of https://github.com/torvalds/linux.git
627 lines
14 KiB
C
627 lines
14 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
#define _GNU_SOURCE
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <limits.h>
|
|
#include <sched.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/syscall.h>
|
|
#include <sys/types.h>
|
|
#include <sys/wait.h>
|
|
#include <unistd.h>
|
|
#include <linux/nsfs.h>
|
|
#include "../kselftest_harness.h"
|
|
#include "../filesystems/utils.h"
|
|
#include "wrappers.h"
|
|
|
|
/*
|
|
* Stress tests for namespace active reference counting.
|
|
*
|
|
* These tests validate that the active reference counting system can handle
|
|
* high load scenarios including rapid namespace creation/destruction, large
|
|
* numbers of concurrent namespaces, and various edge cases under stress.
|
|
*/
|
|
|
|
/*
|
|
* Test rapid creation and destruction of user namespaces.
|
|
* Create and destroy namespaces in quick succession to stress the
|
|
* active reference tracking and ensure no leaks occur.
|
|
*/
|
|
TEST(rapid_namespace_creation_destruction)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = CLONE_NEWUSER,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[256], ns_ids_after[256];
|
|
ssize_t ret_before, ret_after;
|
|
int i;
|
|
|
|
/* Get baseline count of active user namespaces */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
|
|
|
/* Rapidly create and destroy 100 user namespaces */
|
|
for (i = 0; i < 100; i++) {
|
|
pid_t pid = fork();
|
|
ASSERT_GE(pid, 0);
|
|
|
|
if (pid == 0) {
|
|
/* Child: create user namespace and immediately exit */
|
|
if (setup_userns() < 0)
|
|
exit(1);
|
|
exit(0);
|
|
}
|
|
|
|
/* Parent: wait for child */
|
|
int status;
|
|
waitpid(pid, &status, 0);
|
|
ASSERT_TRUE(WIFEXITED(status));
|
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
|
}
|
|
|
|
/* Verify we're back to baseline (no leaked namespaces) */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After 100 rapid create/destroy cycles: %zd active user namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
/*
|
|
* Test creating many concurrent namespaces.
|
|
* Verify that listns() correctly tracks all of them and that they all
|
|
* become inactive after processes exit.
|
|
*/
|
|
TEST(many_concurrent_namespaces)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = CLONE_NEWUSER,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[512], ns_ids_during[512], ns_ids_after[512];
|
|
ssize_t ret_before, ret_during, ret_after;
|
|
pid_t pids[50];
|
|
int num_children = 50;
|
|
int i;
|
|
int sv[2];
|
|
|
|
/* Get baseline */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
|
|
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
|
|
|
/* Create many children, each with their own user namespace */
|
|
for (i = 0; i < num_children; i++) {
|
|
pids[i] = fork();
|
|
ASSERT_GE(pids[i], 0);
|
|
|
|
if (pids[i] == 0) {
|
|
/* Child: create user namespace and wait for parent signal */
|
|
char c;
|
|
|
|
close(sv[0]);
|
|
|
|
if (setup_userns() < 0) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
/* Signal parent we're ready */
|
|
if (write(sv[1], &c, 1) != 1) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
/* Wait for parent signal to exit */
|
|
if (read(sv[1], &c, 1) != 1) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
close(sv[1]);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
close(sv[1]);
|
|
|
|
/* Wait for all children to signal ready */
|
|
for (i = 0; i < num_children; i++) {
|
|
char c;
|
|
if (read(sv[0], &c, 1) != 1) {
|
|
/* If we fail to read, kill all children and exit */
|
|
close(sv[0]);
|
|
for (int j = 0; j < num_children; j++)
|
|
kill(pids[j], SIGKILL);
|
|
for (int j = 0; j < num_children; j++)
|
|
waitpid(pids[j], NULL, 0);
|
|
ASSERT_TRUE(false);
|
|
}
|
|
}
|
|
|
|
/* List namespaces while all children are running */
|
|
ret_during = sys_listns(&req, ns_ids_during, ARRAY_SIZE(ns_ids_during), 0);
|
|
ASSERT_GE(ret_during, 0);
|
|
|
|
TH_LOG("With %d children running: %zd active user namespaces", num_children, ret_during);
|
|
|
|
/* Should have at least num_children more namespaces than baseline */
|
|
ASSERT_GE(ret_during, ret_before + num_children);
|
|
|
|
/* Signal all children to exit */
|
|
for (i = 0; i < num_children; i++) {
|
|
char c = 'X';
|
|
if (write(sv[0], &c, 1) != 1) {
|
|
/* If we fail to write, kill remaining children */
|
|
close(sv[0]);
|
|
for (int j = i; j < num_children; j++)
|
|
kill(pids[j], SIGKILL);
|
|
for (int j = 0; j < num_children; j++)
|
|
waitpid(pids[j], NULL, 0);
|
|
ASSERT_TRUE(false);
|
|
}
|
|
}
|
|
|
|
close(sv[0]);
|
|
|
|
/* Wait for all children */
|
|
for (i = 0; i < num_children; i++) {
|
|
int status;
|
|
waitpid(pids[i], &status, 0);
|
|
ASSERT_TRUE(WIFEXITED(status));
|
|
}
|
|
|
|
/* Verify we're back to baseline */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After all children exit: %zd active user namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
/*
|
|
* Test rapid namespace creation with different namespace types.
|
|
* Create multiple types of namespaces rapidly to stress the tracking system.
|
|
*/
|
|
TEST(rapid_mixed_namespace_creation)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = 0, /* All types */
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
|
ssize_t ret_before, ret_after;
|
|
int i;
|
|
|
|
/* Get baseline count */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active namespaces (all types)", ret_before);
|
|
|
|
/* Rapidly create and destroy namespaces with multiple types */
|
|
for (i = 0; i < 50; i++) {
|
|
pid_t pid = fork();
|
|
ASSERT_GE(pid, 0);
|
|
|
|
if (pid == 0) {
|
|
/* Child: create multiple namespace types */
|
|
if (setup_userns() < 0)
|
|
exit(1);
|
|
|
|
/* Create additional namespace types */
|
|
if (unshare(CLONE_NEWNET) < 0)
|
|
exit(1);
|
|
if (unshare(CLONE_NEWUTS) < 0)
|
|
exit(1);
|
|
if (unshare(CLONE_NEWIPC) < 0)
|
|
exit(1);
|
|
|
|
exit(0);
|
|
}
|
|
|
|
/* Parent: wait for child */
|
|
int status;
|
|
waitpid(pid, &status, 0);
|
|
ASSERT_TRUE(WIFEXITED(status));
|
|
}
|
|
|
|
/* Verify we're back to baseline */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After 50 rapid mixed namespace cycles: %zd active namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
/*
|
|
* Test nested namespace creation under stress.
|
|
* Create deeply nested namespace hierarchies and verify proper cleanup.
|
|
*/
|
|
TEST(nested_namespace_stress)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = CLONE_NEWUSER,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
|
ssize_t ret_before, ret_after;
|
|
int i;
|
|
|
|
/* Get baseline */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active user namespaces", ret_before);
|
|
|
|
/* Create 20 processes, each with nested user namespaces */
|
|
for (i = 0; i < 20; i++) {
|
|
pid_t pid = fork();
|
|
ASSERT_GE(pid, 0);
|
|
|
|
if (pid == 0) {
|
|
int userns_fd;
|
|
uid_t orig_uid = getuid();
|
|
int depth;
|
|
|
|
/* Create nested user namespaces (up to 5 levels) */
|
|
for (depth = 0; depth < 5; depth++) {
|
|
userns_fd = get_userns_fd(0, (depth == 0) ? orig_uid : 0, 1);
|
|
if (userns_fd < 0)
|
|
exit(1);
|
|
|
|
if (setns(userns_fd, CLONE_NEWUSER) < 0) {
|
|
close(userns_fd);
|
|
exit(1);
|
|
}
|
|
close(userns_fd);
|
|
}
|
|
|
|
exit(0);
|
|
}
|
|
|
|
/* Parent: wait for child */
|
|
int status;
|
|
waitpid(pid, &status, 0);
|
|
ASSERT_TRUE(WIFEXITED(status));
|
|
}
|
|
|
|
/* Verify we're back to baseline */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After 20 nested namespace hierarchies: %zd active user namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
/*
|
|
* Test listns() pagination under stress.
|
|
* Create many namespaces and verify pagination works correctly.
|
|
*/
|
|
TEST(listns_pagination_stress)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = CLONE_NEWUSER,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
pid_t pids[30];
|
|
int num_children = 30;
|
|
int i;
|
|
int sv[2];
|
|
__u64 all_ns_ids[512];
|
|
int total_found = 0;
|
|
|
|
ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sv), 0);
|
|
|
|
/* Create many children with user namespaces */
|
|
for (i = 0; i < num_children; i++) {
|
|
pids[i] = fork();
|
|
ASSERT_GE(pids[i], 0);
|
|
|
|
if (pids[i] == 0) {
|
|
char c;
|
|
close(sv[0]);
|
|
|
|
if (setup_userns() < 0) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
/* Signal parent we're ready */
|
|
if (write(sv[1], &c, 1) != 1) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
/* Wait for parent signal to exit */
|
|
if (read(sv[1], &c, 1) != 1) {
|
|
close(sv[1]);
|
|
exit(1);
|
|
}
|
|
|
|
close(sv[1]);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
close(sv[1]);
|
|
|
|
/* Wait for all children to signal ready */
|
|
for (i = 0; i < num_children; i++) {
|
|
char c;
|
|
if (read(sv[0], &c, 1) != 1) {
|
|
/* If we fail to read, kill all children and exit */
|
|
close(sv[0]);
|
|
for (int j = 0; j < num_children; j++)
|
|
kill(pids[j], SIGKILL);
|
|
for (int j = 0; j < num_children; j++)
|
|
waitpid(pids[j], NULL, 0);
|
|
ASSERT_TRUE(false);
|
|
}
|
|
}
|
|
|
|
/* Paginate through all namespaces using small batch sizes */
|
|
req.ns_id = 0;
|
|
while (1) {
|
|
__u64 batch[5]; /* Small batch size to force pagination */
|
|
ssize_t ret;
|
|
|
|
ret = sys_listns(&req, batch, ARRAY_SIZE(batch), 0);
|
|
if (ret < 0) {
|
|
if (errno == ENOSYS) {
|
|
close(sv[0]);
|
|
for (i = 0; i < num_children; i++)
|
|
kill(pids[i], SIGKILL);
|
|
for (i = 0; i < num_children; i++)
|
|
waitpid(pids[i], NULL, 0);
|
|
SKIP(return, "listns() not supported");
|
|
}
|
|
ASSERT_GE(ret, 0);
|
|
}
|
|
|
|
if (ret == 0)
|
|
break;
|
|
|
|
/* Store results */
|
|
for (i = 0; i < ret && total_found < 512; i++) {
|
|
all_ns_ids[total_found++] = batch[i];
|
|
}
|
|
|
|
/* Update cursor for next batch */
|
|
if (ret == ARRAY_SIZE(batch))
|
|
req.ns_id = batch[ret - 1];
|
|
else
|
|
break;
|
|
}
|
|
|
|
TH_LOG("Paginated through %d user namespaces", total_found);
|
|
|
|
/* Verify no duplicates in pagination */
|
|
for (i = 0; i < total_found; i++) {
|
|
for (int j = i + 1; j < total_found; j++) {
|
|
if (all_ns_ids[i] == all_ns_ids[j]) {
|
|
TH_LOG("Found duplicate ns_id: %llu at positions %d and %d",
|
|
(unsigned long long)all_ns_ids[i], i, j);
|
|
ASSERT_TRUE(false);
|
|
}
|
|
}
|
|
}
|
|
|
|
/* Signal all children to exit */
|
|
for (i = 0; i < num_children; i++) {
|
|
char c = 'X';
|
|
if (write(sv[0], &c, 1) != 1) {
|
|
close(sv[0]);
|
|
for (int j = i; j < num_children; j++)
|
|
kill(pids[j], SIGKILL);
|
|
for (int j = 0; j < num_children; j++)
|
|
waitpid(pids[j], NULL, 0);
|
|
ASSERT_TRUE(false);
|
|
}
|
|
}
|
|
|
|
close(sv[0]);
|
|
|
|
/* Wait for all children */
|
|
for (i = 0; i < num_children; i++) {
|
|
int status;
|
|
waitpid(pids[i], &status, 0);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Test concurrent namespace operations.
|
|
* Multiple processes creating, querying, and destroying namespaces concurrently.
|
|
*/
|
|
TEST(concurrent_namespace_operations)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = 0,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
|
ssize_t ret_before, ret_after;
|
|
pid_t pids[20];
|
|
int num_workers = 20;
|
|
int i;
|
|
|
|
/* Get baseline */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active namespaces", ret_before);
|
|
|
|
/* Create worker processes that do concurrent operations */
|
|
for (i = 0; i < num_workers; i++) {
|
|
pids[i] = fork();
|
|
ASSERT_GE(pids[i], 0);
|
|
|
|
if (pids[i] == 0) {
|
|
/* Each worker: create namespaces, list them, repeat */
|
|
int iterations;
|
|
|
|
for (iterations = 0; iterations < 10; iterations++) {
|
|
int userns_fd;
|
|
__u64 temp_ns_ids[100];
|
|
ssize_t ret;
|
|
|
|
/* Create a user namespace */
|
|
userns_fd = get_userns_fd(0, getuid(), 1);
|
|
if (userns_fd < 0)
|
|
continue;
|
|
|
|
/* List namespaces */
|
|
ret = sys_listns(&req, temp_ns_ids, ARRAY_SIZE(temp_ns_ids), 0);
|
|
(void)ret;
|
|
|
|
close(userns_fd);
|
|
|
|
/* Small delay */
|
|
usleep(1000);
|
|
}
|
|
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
/* Wait for all workers */
|
|
for (i = 0; i < num_workers; i++) {
|
|
int status;
|
|
waitpid(pids[i], &status, 0);
|
|
ASSERT_TRUE(WIFEXITED(status));
|
|
ASSERT_EQ(WEXITSTATUS(status), 0);
|
|
}
|
|
|
|
/* Verify we're back to baseline */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After concurrent operations: %zd active namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
/*
|
|
* Test namespace churn - continuous creation and destruction.
|
|
* Simulates high-churn scenarios like container orchestration.
|
|
*/
|
|
TEST(namespace_churn)
|
|
{
|
|
struct ns_id_req req = {
|
|
.size = sizeof(req),
|
|
.spare = 0,
|
|
.ns_id = 0,
|
|
.ns_type = CLONE_NEWUSER | CLONE_NEWNET | CLONE_NEWUTS,
|
|
.spare2 = 0,
|
|
.user_ns_id = 0,
|
|
};
|
|
__u64 ns_ids_before[512], ns_ids_after[512];
|
|
ssize_t ret_before, ret_after;
|
|
int cycle;
|
|
|
|
/* Get baseline */
|
|
ret_before = sys_listns(&req, ns_ids_before, ARRAY_SIZE(ns_ids_before), 0);
|
|
if (ret_before < 0) {
|
|
if (errno == ENOSYS)
|
|
SKIP(return, "listns() not supported");
|
|
ASSERT_GE(ret_before, 0);
|
|
}
|
|
|
|
TH_LOG("Baseline: %zd active namespaces", ret_before);
|
|
|
|
/* Simulate churn: batches of namespaces created and destroyed */
|
|
for (cycle = 0; cycle < 10; cycle++) {
|
|
pid_t batch_pids[10];
|
|
int i;
|
|
|
|
/* Create batch */
|
|
for (i = 0; i < 10; i++) {
|
|
batch_pids[i] = fork();
|
|
ASSERT_GE(batch_pids[i], 0);
|
|
|
|
if (batch_pids[i] == 0) {
|
|
/* Create multiple namespace types */
|
|
if (setup_userns() < 0)
|
|
exit(1);
|
|
if (unshare(CLONE_NEWNET) < 0)
|
|
exit(1);
|
|
if (unshare(CLONE_NEWUTS) < 0)
|
|
exit(1);
|
|
|
|
/* Keep namespaces alive briefly */
|
|
usleep(10000);
|
|
exit(0);
|
|
}
|
|
}
|
|
|
|
/* Wait for batch to complete */
|
|
for (i = 0; i < 10; i++) {
|
|
int status;
|
|
waitpid(batch_pids[i], &status, 0);
|
|
}
|
|
}
|
|
|
|
/* Verify we're back to baseline */
|
|
ret_after = sys_listns(&req, ns_ids_after, ARRAY_SIZE(ns_ids_after), 0);
|
|
ASSERT_GE(ret_after, 0);
|
|
|
|
TH_LOG("After 10 churn cycles (100 namespace sets): %zd active namespaces", ret_after);
|
|
ASSERT_EQ(ret_before, ret_after);
|
|
}
|
|
|
|
TEST_HARNESS_MAIN
|