From 4e97bae1b412cd6ed8053b3d8a242122952985cc Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:40 +0100 Subject: [PATCH 01/36] cleanup: fix scoped_class() This is a class, not a guard so why on earth is it checking for guard pointers or conditional lock acquisition? None of it makes any sense at all. I'm not sure what happened back then. Maybe I had a brief psychedelic period that I completely forgot about and spaced out into a zone where that initial macro implementation made any sense at all. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-1-cb3ec8711a6a@kernel.org Fixes: 5c21c5f22d07 ("cleanup: add a scoped version of CLASS()") Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cleanup.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 2573585b7f06..19c7e475d3a4 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -290,15 +290,16 @@ static inline class_##_name##_t class_##_name##ext##_constructor(_init_args) \ class_##_name##_t var __cleanup(class_##_name##_destructor) = \ class_##_name##_constructor -#define scoped_class(_name, var, args) \ - for (CLASS(_name, var)(args); \ - __guard_ptr(_name)(&var) || !__is_cond_ptr(_name); \ - ({ goto _label; })) \ - if (0) { \ -_label: \ - break; \ +#define __scoped_class(_name, var, _label, args...) \ + for (CLASS(_name, var)(args); ; ({ goto _label; })) \ + if (0) { \ +_label: \ + break; \ } else +#define scoped_class(_name, var, args...) \ + __scoped_class(_name, var, __UNIQUE_ID(label), args) + /* * DEFINE_GUARD(name, type, lock, unlock): * trivial wrapper around DEFINE_CLASS() above specifically From 4c7ceeb62d3330b6fb2b549ae833a92c0f481f3e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:41 +0100 Subject: [PATCH 02/36] cred: add kernel_cred() helper Access kernel creds based off of init_task. This will let us avoid any direct access to init_cred. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-2-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cred.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/linux/cred.h b/include/linux/cred.h index 89ae50ad2ace..8ab3718184ad 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -20,6 +20,8 @@ struct cred; struct inode; +extern struct task_struct init_task; + /* * COW Supplementary groups list */ @@ -156,6 +158,11 @@ extern struct cred *prepare_exec_creds(void); extern int commit_creds(struct cred *); extern void abort_creds(struct cred *); extern struct cred *prepare_kernel_cred(struct task_struct *); +static inline const struct cred *kernel_cred(void) +{ + /* shut up sparse */ + return rcu_dereference_raw(init_task.cred); +} extern int set_security_override(struct cred *, u32); extern int set_security_override_from_ctx(struct cred *, const char *); extern int set_create_files_as(struct cred *, struct inode *); From 40314c2818b700da695c9686348be7aef9e156a2 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:42 +0100 Subject: [PATCH 03/36] cred: make init_cred static There's zero need to expose struct init_cred. The very few places that need access can just go through init_task which is already exported. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-3-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/init_task.h | 1 - init/init_task.c | 27 +++++++++++++++++++++++++++ kernel/cred.c | 27 --------------------------- security/keys/process_keys.c | 2 +- 4 files changed, 28 insertions(+), 29 deletions(-) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index bccb3f1f6262..a6cb241ea00c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,7 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; extern struct nsproxy init_nsproxy; -extern struct cred init_cred; #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #define INIT_PREV_CPUTIME(x) .prev_cputime = { \ diff --git a/init/init_task.c b/init/init_task.c index a55e2189206f..d970a847b657 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -62,6 +62,33 @@ unsigned long init_shadow_call_stack[SCS_SIZE / sizeof(long)] = { }; #endif +/* init to 2 - one for init_task, one to ensure it is never freed */ +static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; + +/* + * The initial credentials for the initial task + */ +static struct cred init_cred = { + .usage = ATOMIC_INIT(4), + .uid = GLOBAL_ROOT_UID, + .gid = GLOBAL_ROOT_GID, + .suid = GLOBAL_ROOT_UID, + .sgid = GLOBAL_ROOT_GID, + .euid = GLOBAL_ROOT_UID, + .egid = GLOBAL_ROOT_GID, + .fsuid = GLOBAL_ROOT_UID, + .fsgid = GLOBAL_ROOT_GID, + .securebits = SECUREBITS_DEFAULT, + .cap_inheritable = CAP_EMPTY_SET, + .cap_permitted = CAP_FULL_SET, + .cap_effective = CAP_FULL_SET, + .cap_bset = CAP_FULL_SET, + .user = INIT_USER, + .user_ns = &init_user_ns, + .group_info = &init_groups, + .ucounts = &init_ucounts, +}; + /* * Set up the first task table, touch at your own risk!. Base=0, * limit=0x1fffff (=2MB) diff --git a/kernel/cred.c b/kernel/cred.c index dbf6b687dc5c..ac87ed9d43b1 100644 --- a/kernel/cred.c +++ b/kernel/cred.c @@ -35,33 +35,6 @@ do { \ static struct kmem_cache *cred_jar; -/* init to 2 - one for init_task, one to ensure it is never freed */ -static struct group_info init_groups = { .usage = REFCOUNT_INIT(2) }; - -/* - * The initial credentials for the initial task - */ -struct cred init_cred = { - .usage = ATOMIC_INIT(4), - .uid = GLOBAL_ROOT_UID, - .gid = GLOBAL_ROOT_GID, - .suid = GLOBAL_ROOT_UID, - .sgid = GLOBAL_ROOT_GID, - .euid = GLOBAL_ROOT_UID, - .egid = GLOBAL_ROOT_GID, - .fsuid = GLOBAL_ROOT_UID, - .fsgid = GLOBAL_ROOT_GID, - .securebits = SECUREBITS_DEFAULT, - .cap_inheritable = CAP_EMPTY_SET, - .cap_permitted = CAP_FULL_SET, - .cap_effective = CAP_FULL_SET, - .cap_bset = CAP_FULL_SET, - .user = INIT_USER, - .user_ns = &init_user_ns, - .group_info = &init_groups, - .ucounts = &init_ucounts, -}; - /* * The RCU callback to actually dispose of a set of credentials */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index b5d5333ab330..a63c46bb2d14 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -51,7 +51,7 @@ static struct key *get_user_register(struct user_namespace *user_ns) if (!reg_keyring) { reg_keyring = keyring_alloc(".user_reg", user_ns->owner, INVALID_GID, - &init_cred, + kernel_cred(), KEY_POS_WRITE | KEY_POS_SEARCH | KEY_USR_VIEW | KEY_USR_READ, 0, From ae40e6c65791f47c76cc14d0cce2707fe6053f72 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:43 +0100 Subject: [PATCH 04/36] cred: add scoped_with_kernel_creds() Add a new cleanup class for override creds. We can make use of this in a bunch of places going forward. Based on this scoped_with_kernel_creds() that can be used to temporarily assume kernel credentials for specific tasks such as firmware loading, or coredump socket connections. At no point will the caller interact with the kernel credentials directly. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-4-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- include/linux/cred.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/linux/cred.h b/include/linux/cred.h index 8ab3718184ad..be2cd07b174c 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -187,6 +187,14 @@ static inline const struct cred *revert_creds(const struct cred *revert_cred) return rcu_replace_pointer(current->cred, revert_cred, 1); } +DEFINE_CLASS(override_creds, + const struct cred *, + revert_creds(_T), + override_creds(override_cred), const struct cred *override_cred) + +#define scoped_with_kernel_creds() \ + scoped_class(override_creds, __UNIQUE_ID(cred), kernel_cred()) + /** * get_cred_many - Get references on a set of credentials * @cred: The credentials to reference From b9e3594e70193c84066b868e7a1eb38263d9a999 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:44 +0100 Subject: [PATCH 05/36] firmware: don't copy kernel creds No need to copy kernel credentials. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-5-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/base/firmware_loader/main.c | 59 ++++++++++++----------------- 1 file changed, 25 insertions(+), 34 deletions(-) diff --git a/drivers/base/firmware_loader/main.c b/drivers/base/firmware_loader/main.c index 6942c62fa59d..bee3050a20d9 100644 --- a/drivers/base/firmware_loader/main.c +++ b/drivers/base/firmware_loader/main.c @@ -829,8 +829,6 @@ _request_firmware(const struct firmware **firmware_p, const char *name, size_t offset, u32 opt_flags) { struct firmware *fw = NULL; - struct cred *kern_cred = NULL; - const struct cred *old_cred; bool nondirect = false; int ret; @@ -871,45 +869,38 @@ _request_firmware(const struct firmware **firmware_p, const char *name, * called by a driver when serving an unrelated request from userland, we use * the kernel credentials to read the file. */ - kern_cred = prepare_kernel_cred(&init_task); - if (!kern_cred) { - ret = -ENOMEM; - goto out; - } - old_cred = override_creds(kern_cred); + scoped_with_kernel_creds() { + ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); - ret = fw_get_filesystem_firmware(device, fw->priv, "", NULL); - - /* Only full reads can support decompression, platform, and sysfs. */ - if (!(opt_flags & FW_OPT_PARTIAL)) - nondirect = true; + /* Only full reads can support decompression, platform, and sysfs. */ + if (!(opt_flags & FW_OPT_PARTIAL)) + nondirect = true; #ifdef CONFIG_FW_LOADER_COMPRESS_ZSTD - if (ret == -ENOENT && nondirect) - ret = fw_get_filesystem_firmware(device, fw->priv, ".zst", - fw_decompress_zstd); + if (ret == -ENOENT && nondirect) + ret = fw_get_filesystem_firmware(device, fw->priv, ".zst", + fw_decompress_zstd); #endif #ifdef CONFIG_FW_LOADER_COMPRESS_XZ - if (ret == -ENOENT && nondirect) - ret = fw_get_filesystem_firmware(device, fw->priv, ".xz", - fw_decompress_xz); + if (ret == -ENOENT && nondirect) + ret = fw_get_filesystem_firmware(device, fw->priv, ".xz", + fw_decompress_xz); #endif - if (ret == -ENOENT && nondirect) - ret = firmware_fallback_platform(fw->priv); + if (ret == -ENOENT && nondirect) + ret = firmware_fallback_platform(fw->priv); - if (ret) { - if (!(opt_flags & FW_OPT_NO_WARN)) - dev_warn(device, - "Direct firmware load for %s failed with error %d\n", - name, ret); - if (nondirect) - ret = firmware_fallback_sysfs(fw, name, device, - opt_flags, ret); - } else - ret = assign_fw(fw, device); - - revert_creds(old_cred); - put_cred(kern_cred); + if (ret) { + if (!(opt_flags & FW_OPT_NO_WARN)) + dev_warn(device, + "Direct firmware load for %s failed with error %d\n", + name, ret); + if (nondirect) + ret = firmware_fallback_sysfs(fw, name, device, + opt_flags, ret); + } else { + ret = assign_fw(fw, device); + } + } out: if (ret < 0) { From 4601b7923d1b51b3788581b890a0d4d105a137de Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:45 +0100 Subject: [PATCH 06/36] nbd: don't copy kernel creds No need to copy kernel credentials. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-6-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/block/nbd.c | 50 +++++++++++++++++---------------------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/drivers/block/nbd.c b/drivers/block/nbd.c index a853c65ac65d..3263040fcf2d 100644 --- a/drivers/block/nbd.c +++ b/drivers/block/nbd.c @@ -52,7 +52,6 @@ static DEFINE_IDR(nbd_index_idr); static DEFINE_MUTEX(nbd_index_mutex); static struct workqueue_struct *nbd_del_wq; -static struct cred *nbd_cred; static int nbd_total_devices = 0; struct nbd_sock { @@ -555,7 +554,6 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, int result; struct msghdr msg = {} ; unsigned int noreclaim_flag; - const struct cred *old_cred; if (unlikely(!sock)) { dev_err_ratelimited(disk_to_dev(nbd->disk), @@ -564,34 +562,33 @@ static int __sock_xmit(struct nbd_device *nbd, struct socket *sock, int send, return -EINVAL; } - old_cred = override_creds(nbd_cred); - msg.msg_iter = *iter; noreclaim_flag = memalloc_noreclaim_save(); - do { - sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; - sock->sk->sk_use_task_frag = false; - msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (send) - result = sock_sendmsg(sock, &msg); - else - result = sock_recvmsg(sock, &msg, msg.msg_flags); + scoped_with_kernel_creds() { + do { + sock->sk->sk_allocation = GFP_NOIO | __GFP_MEMALLOC; + sock->sk->sk_use_task_frag = false; + msg.msg_flags = msg_flags | MSG_NOSIGNAL; - if (result <= 0) { - if (result == 0) - result = -EPIPE; /* short read */ - break; - } - if (sent) - *sent += result; - } while (msg_data_left(&msg)); + if (send) + result = sock_sendmsg(sock, &msg); + else + result = sock_recvmsg(sock, &msg, msg.msg_flags); + + if (result <= 0) { + if (result == 0) + result = -EPIPE; /* short read */ + break; + } + if (sent) + *sent += result; + } while (msg_data_left(&msg)); + } memalloc_noreclaim_restore(noreclaim_flag); - revert_creds(old_cred); - return result; } @@ -2683,15 +2680,7 @@ static int __init nbd_init(void) return -ENOMEM; } - nbd_cred = prepare_kernel_cred(&init_task); - if (!nbd_cred) { - destroy_workqueue(nbd_del_wq); - unregister_blkdev(NBD_MAJOR, "nbd"); - return -ENOMEM; - } - if (genl_register_family(&nbd_genl_family)) { - put_cred(nbd_cred); destroy_workqueue(nbd_del_wq); unregister_blkdev(NBD_MAJOR, "nbd"); return -EINVAL; @@ -2746,7 +2735,6 @@ static void __exit nbd_cleanup(void) /* Also wait for nbd_dev_remove_work() completes */ destroy_workqueue(nbd_del_wq); - put_cred(nbd_cred); idr_destroy(&nbd_index_idr); unregister_blkdev(NBD_MAJOR, "nbd"); } From 0f0e7cee3496cc053c7a1a15a428b585d6b7e897 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:46 +0100 Subject: [PATCH 07/36] target: don't copy kernel creds Get rid of all the boilerplate and tightly scope when the task runs with kernel creds. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-7-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- drivers/target/target_core_configfs.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index b19acd662726..9e51c535ba8c 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -3670,8 +3670,6 @@ static int __init target_core_init_configfs(void) { struct configfs_subsystem *subsys = &target_core_fabrics; struct t10_alua_lu_gp *lu_gp; - struct cred *kern_cred; - const struct cred *old_cred; int ret; pr_debug("TARGET_CORE[0]: Loading Generic Kernel Storage" @@ -3748,16 +3746,8 @@ static int __init target_core_init_configfs(void) if (ret < 0) goto out; - /* We use the kernel credentials to access the target directory */ - kern_cred = prepare_kernel_cred(&init_task); - if (!kern_cred) { - ret = -ENOMEM; - goto out; - } - old_cred = override_creds(kern_cred); - target_init_dbroot(); - revert_creds(old_cred); - put_cred(kern_cred); + scoped_with_kernel_creds() + target_init_dbroot(); return 0; From 1ad5b411afc327ae50e569dbfa15774e0baefa68 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 00:12:47 +0100 Subject: [PATCH 08/36] unix: don't copy creds No need to copy kernel credentials. Link: https://patch.msgid.link/20251103-work-creds-init_cred-v1-8-cb3ec8711a6a@kernel.org Reviewed-by: Jens Axboe Signed-off-by: Christian Brauner --- net/unix/af_unix.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 768098dec231..68c94f49f7b5 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1210,25 +1210,16 @@ static struct sock *unix_find_bsd(struct sockaddr_un *sunaddr, int addr_len, unix_mkname_bsd(sunaddr, addr_len); if (flags & SOCK_COREDUMP) { - const struct cred *cred; - struct cred *kcred; struct path root; - kcred = prepare_kernel_cred(&init_task); - if (!kcred) { - err = -ENOMEM; - goto fail; - } - task_lock(&init_task); get_fs_root(init_task.fs, &root); task_unlock(&init_task); - cred = override_creds(kcred); - err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path, - LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | - LOOKUP_NO_MAGICLINKS, &path); - put_cred(revert_creds(cred)); + scoped_with_kernel_creds() + err = vfs_path_lookup(root.dentry, root.mnt, sunaddr->sun_path, + LOOKUP_BENEATH | LOOKUP_NO_SYMLINKS | + LOOKUP_NO_MAGICLINKS, &path); path_put(&root); if (err) goto fail; From 019e52e8d324d568e71730946beb11e7b275ff08 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:49 +0100 Subject: [PATCH 09/36] cred: add scoped_with_creds() guards and implement scoped_with_kernel_creds() on top of it. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-1-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- include/linux/cred.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/cred.h b/include/linux/cred.h index be2cd07b174c..6ea2d81a740b 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -192,8 +192,10 @@ DEFINE_CLASS(override_creds, revert_creds(_T), override_creds(override_cred), const struct cred *override_cred) -#define scoped_with_kernel_creds() \ - scoped_class(override_creds, __UNIQUE_ID(cred), kernel_cred()) +#define scoped_with_creds(cred) \ + scoped_class(override_creds, __UNIQUE_ID(label), cred) + +#define scoped_with_kernel_creds() scoped_with_creds(kernel_cred()) /** * get_cred_many - Get references on a set of credentials From 84c1a329b4fce8e51958cd4f27bd62743b892a7d Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:50 +0100 Subject: [PATCH 10/36] aio: use credential guards Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-2-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/aio.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 5bc133386407..0a23a8c0717f 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1640,10 +1640,10 @@ static int aio_write(struct kiocb *req, const struct iocb *iocb, static void aio_fsync_work(struct work_struct *work) { struct aio_kiocb *iocb = container_of(work, struct aio_kiocb, fsync.work); - const struct cred *old_cred = override_creds(iocb->fsync.creds); - iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); - revert_creds(old_cred); + scoped_with_creds(iocb->fsync.creds) + iocb->ki_res.res = vfs_fsync(iocb->fsync.file, iocb->fsync.datasync); + put_cred(iocb->fsync.creds); iocb_put(iocb); } From 4f0a4825786a114898b6f10a1ffe95ac0402e57e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:51 +0100 Subject: [PATCH 11/36] backing-file: use credential guards for reads Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-3-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 52 +++++++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 24 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 15a7f8031084..37f3bf29595f 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -157,13 +157,37 @@ static int backing_aio_init_wq(struct kiocb *iocb) return sb_init_dio_done_wq(sb); } +static int do_backing_file_read_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags) +{ + struct backing_aio *aio = NULL; + int ret; + + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + return vfs_iter_read(file, iter, &iocb->ki_pos, rwf); + } + + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + return -ENOMEM; + + aio->orig_iocb = iocb; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_complete = backing_aio_rw_complete; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_read(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + return ret; +} ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx) { - struct backing_aio *aio = NULL; - const struct cred *old_cred; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) @@ -176,28 +200,8 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, !(file->f_mode & FMODE_CAN_ODIRECT)) return -EINVAL; - old_cred = override_creds(ctx->cred); - if (is_sync_kiocb(iocb)) { - rwf_t rwf = iocb_to_rw_flags(flags); - - ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); - } else { - ret = -ENOMEM; - aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); - if (!aio) - goto out; - - aio->orig_iocb = iocb; - kiocb_clone(&aio->iocb, iocb, get_file(file)); - aio->iocb.ki_complete = backing_aio_rw_complete; - refcount_set(&aio->ref, 2); - ret = vfs_iocb_iter_read(file, &aio->iocb, iter); - backing_aio_put(aio); - if (ret != -EIOCBQUEUED) - backing_aio_cleanup(aio, ret); - } -out: - revert_creds(old_cred); + scoped_with_creds(ctx->cred) + ret = do_backing_file_read_iter(file, iter, iocb, flags); if (ctx->accessed) ctx->accessed(iocb->ki_filp); From f119feaa06586aed78b98b13ac9bcfac942c583e Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:52 +0100 Subject: [PATCH 12/36] backing-file: use credential guards for writes Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-4-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 74 +++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 37f3bf29595f..2ba0f3c7f203 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -210,11 +210,47 @@ ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, } EXPORT_SYMBOL_GPL(backing_file_read_iter); +static int do_backing_file_write_iter(struct file *file, struct iov_iter *iter, + struct kiocb *iocb, int flags, + void (*end_write)(struct kiocb *, ssize_t)) +{ + struct backing_aio *aio; + int ret; + + if (is_sync_kiocb(iocb)) { + rwf_t rwf = iocb_to_rw_flags(flags); + + ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); + if (end_write) + end_write(iocb, ret); + return ret; + } + + ret = backing_aio_init_wq(iocb); + if (ret) + return ret; + + aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); + if (!aio) + return -ENOMEM; + + aio->orig_iocb = iocb; + aio->end_write = end_write; + kiocb_clone(&aio->iocb, iocb, get_file(file)); + aio->iocb.ki_flags = flags; + aio->iocb.ki_complete = backing_aio_queue_completion; + refcount_set(&aio->ref, 2); + ret = vfs_iocb_iter_write(file, &aio->iocb, iter); + backing_aio_put(aio); + if (ret != -EIOCBQUEUED) + backing_aio_cleanup(aio, ret); + return ret; +} + ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, struct kiocb *iocb, int flags, struct backing_file_ctx *ctx) { - const struct cred *old_cred; ssize_t ret; if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) @@ -237,40 +273,8 @@ ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, */ flags &= ~IOCB_DIO_CALLER_COMP; - old_cred = override_creds(ctx->cred); - if (is_sync_kiocb(iocb)) { - rwf_t rwf = iocb_to_rw_flags(flags); - - ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); - if (ctx->end_write) - ctx->end_write(iocb, ret); - } else { - struct backing_aio *aio; - - ret = backing_aio_init_wq(iocb); - if (ret) - goto out; - - ret = -ENOMEM; - aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); - if (!aio) - goto out; - - aio->orig_iocb = iocb; - aio->end_write = ctx->end_write; - kiocb_clone(&aio->iocb, iocb, get_file(file)); - aio->iocb.ki_flags = flags; - aio->iocb.ki_complete = backing_aio_queue_completion; - refcount_set(&aio->ref, 2); - ret = vfs_iocb_iter_write(file, &aio->iocb, iter); - backing_aio_put(aio); - if (ret != -EIOCBQUEUED) - backing_aio_cleanup(aio, ret); - } -out: - revert_creds(old_cred); - - return ret; + scoped_with_creds(ctx->cred) + return do_backing_file_write_iter(file, iter, iocb, flags, ctx->end_write); } EXPORT_SYMBOL_GPL(backing_file_write_iter); From c3076d146e312af1ee2eff0287e298cf20774b39 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:53 +0100 Subject: [PATCH 13/36] backing-file: use credential guards for splice read Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-5-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 2ba0f3c7f203..4d4edf906ef3 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -283,15 +283,13 @@ ssize_t backing_file_splice_read(struct file *in, struct kiocb *iocb, unsigned int flags, struct backing_file_ctx *ctx) { - const struct cred *old_cred; ssize_t ret; if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) return -EIO; - old_cred = override_creds(ctx->cred); - ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags); - revert_creds(old_cred); + scoped_with_creds(ctx->cred) + ret = vfs_splice_read(in, &iocb->ki_pos, pipe, len, flags); if (ctx->accessed) ctx->accessed(iocb->ki_filp); From b688171f910e22d1a32dec24bae8dbecbf2fe395 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:54 +0100 Subject: [PATCH 14/36] backing-file: use credential guards for splice write Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-6-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 4d4edf906ef3..87ff918320c5 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -303,7 +303,6 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, size_t len, unsigned int flags, struct backing_file_ctx *ctx) { - const struct cred *old_cred; ssize_t ret; if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) @@ -316,11 +315,11 @@ ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, if (ret) return ret; - old_cred = override_creds(ctx->cred); - file_start_write(out); - ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags); - file_end_write(out); - revert_creds(old_cred); + scoped_with_creds(ctx->cred) { + file_start_write(out); + ret = out->f_op->splice_write(pipe, out, &iocb->ki_pos, len, flags); + file_end_write(out); + } if (ctx->end_write) ctx->end_write(iocb, ret); From 6e1d1c1fa7b1a8d318ccb5f4f64b2a2b5803cf1c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:55 +0100 Subject: [PATCH 15/36] backing-file: use credential guards for mmap Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-7-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/backing-file.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/backing-file.c b/fs/backing-file.c index 87ff918320c5..ea137be16331 100644 --- a/fs/backing-file.c +++ b/fs/backing-file.c @@ -331,7 +331,6 @@ EXPORT_SYMBOL_GPL(backing_file_splice_write); int backing_file_mmap(struct file *file, struct vm_area_struct *vma, struct backing_file_ctx *ctx) { - const struct cred *old_cred; struct file *user_file = vma->vm_file; int ret; @@ -343,9 +342,8 @@ int backing_file_mmap(struct file *file, struct vm_area_struct *vma, vma_set_file(vma, file); - old_cred = override_creds(ctx->cred); - ret = vfs_mmap(vma->vm_file, vma); - revert_creds(old_cred); + scoped_with_creds(ctx->cred) + ret = vfs_mmap(vma->vm_file, vma); if (ctx->accessed) ctx->accessed(user_file); From ff2044cd277d8d2d6d6ea609d5a10fcbe68a23f9 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:56 +0100 Subject: [PATCH 16/36] binfmt_misc: use credential guards Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-8-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/binfmt_misc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index a839f960cd4a..558db4bd6c2a 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -782,8 +782,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, return PTR_ERR(e); if (e->flags & MISC_FMT_OPEN_FILE) { - const struct cred *old_cred; - /* * Now that we support unprivileged binfmt_misc mounts make * sure we use the credentials that the register @file was @@ -791,9 +789,8 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer, * didn't matter much as only a privileged process could open * the register file. */ - old_cred = override_creds(file->f_cred); - f = open_exec(e->interpreter); - revert_creds(old_cred); + scoped_with_creds(file->f_cred) + f = open_exec(e->interpreter); if (IS_ERR(f)) { pr_notice("register: failed to install interpreter file %s\n", e->interpreter); From 5e88d1aadcd20a8e2cf317839cac0c94006cee64 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:57 +0100 Subject: [PATCH 17/36] erofs: use credential guards Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-9-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/erofs/fileio.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/erofs/fileio.c b/fs/erofs/fileio.c index b7b3432a9882..d27938435b2f 100644 --- a/fs/erofs/fileio.c +++ b/fs/erofs/fileio.c @@ -47,7 +47,6 @@ static void erofs_fileio_ki_complete(struct kiocb *iocb, long ret) static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) { - const struct cred *old_cred; struct iov_iter iter; int ret; @@ -61,9 +60,8 @@ static void erofs_fileio_rq_submit(struct erofs_fileio_rq *rq) rq->iocb.ki_flags = IOCB_DIRECT; iov_iter_bvec(&iter, ITER_DEST, rq->bvecs, rq->bio.bi_vcnt, rq->bio.bi_iter.bi_size); - old_cred = override_creds(rq->iocb.ki_filp->f_cred); - ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); - revert_creds(old_cred); + scoped_with_creds(rq->iocb.ki_filp->f_cred) + ret = vfs_iocb_iter_read(rq->iocb.ki_filp, &rq->iocb, &iter); if (ret != -EIOCBQUEUED) erofs_fileio_ki_complete(&rq->iocb, ret); } From 94afb627dfc2ed8819a9eaa0ff51f0dd24839da8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:58 +0100 Subject: [PATCH 18/36] nfs: use credential guards in nfs_local_call_read() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-10-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/nfs/localio.c | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 2c0455e91571..48bfe54b48a4 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -595,29 +595,26 @@ static void nfs_local_call_read(struct work_struct *work) struct nfs_local_kiocb *iocb = container_of(work, struct nfs_local_kiocb, work); struct file *filp = iocb->kiocb.ki_filp; - const struct cred *save_cred; ssize_t status; - save_cred = override_creds(filp->f_cred); + scoped_with_creds(filp->f_cred) { + for (int i = 0; i < iocb->n_iters ; i++) { + if (iocb->iter_is_dio_aligned[i]) { + iocb->kiocb.ki_flags |= IOCB_DIRECT; + iocb->kiocb.ki_complete = nfs_local_read_aio_complete; + iocb->aio_complete_work = nfs_local_read_aio_complete_work; + } - for (int i = 0; i < iocb->n_iters ; i++) { - if (iocb->iter_is_dio_aligned[i]) { - iocb->kiocb.ki_flags |= IOCB_DIRECT; - iocb->kiocb.ki_complete = nfs_local_read_aio_complete; - iocb->aio_complete_work = nfs_local_read_aio_complete_work; - } - - iocb->kiocb.ki_pos = iocb->offset[i]; - status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); - if (status != -EIOCBQUEUED) { - nfs_local_pgio_done(iocb->hdr, status); - if (iocb->hdr->task.tk_status) - break; + iocb->kiocb.ki_pos = iocb->offset[i]; + status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); + if (status != -EIOCBQUEUED) { + nfs_local_pgio_done(iocb->hdr, status); + if (iocb->hdr->task.tk_status) + break; + } } } - revert_creds(save_cred); - if (status != -EIOCBQUEUED) { nfs_local_read_done(iocb, status); nfs_local_pgio_release(iocb); From bff3c841f7bde5604b26e8ea755728b8e329c6d5 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:26:59 +0100 Subject: [PATCH 19/36] nfs: use credential guards in nfs_local_call_write() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-11-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/nfs/localio.c | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/fs/nfs/localio.c b/fs/nfs/localio.c index 48bfe54b48a4..0c89a9d1e089 100644 --- a/fs/nfs/localio.c +++ b/fs/nfs/localio.c @@ -781,18 +781,11 @@ static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ } -static void nfs_local_call_write(struct work_struct *work) +static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb, + struct file *filp) { - struct nfs_local_kiocb *iocb = - container_of(work, struct nfs_local_kiocb, work); - struct file *filp = iocb->kiocb.ki_filp; - unsigned long old_flags = current->flags; - const struct cred *save_cred; ssize_t status; - current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; - save_cred = override_creds(filp->f_cred); - file_start_write(filp); for (int i = 0; i < iocb->n_iters ; i++) { if (iocb->iter_is_dio_aligned[i]) { @@ -837,7 +830,22 @@ static void nfs_local_call_write(struct work_struct *work) } file_end_write(filp); - revert_creds(save_cred); + return status; +} + +static void nfs_local_call_write(struct work_struct *work) +{ + struct nfs_local_kiocb *iocb = + container_of(work, struct nfs_local_kiocb, work); + struct file *filp = iocb->kiocb.ki_filp; + unsigned long old_flags = current->flags; + ssize_t status; + + current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; + + scoped_with_creds(filp->f_cred) + status = do_nfs_local_call_write(iocb, filp); + current->flags = old_flags; if (status != -EIOCBQUEUED) { From f41799b2e1697779748bb0a15d4aafffe9d2f8dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:27:00 +0100 Subject: [PATCH 20/36] nfs: use credential guards in nfs_idmap_get_key() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-12-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/nfs/nfs4idmap.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/fs/nfs/nfs4idmap.c b/fs/nfs/nfs4idmap.c index 00932500fce4..9e1c48c5c0b8 100644 --- a/fs/nfs/nfs4idmap.c +++ b/fs/nfs/nfs4idmap.c @@ -306,15 +306,12 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen, const char *type, void *data, size_t data_size, struct idmap *idmap) { - const struct cred *saved_cred; struct key *rkey; const struct user_key_payload *payload; ssize_t ret; - saved_cred = override_creds(id_resolver_cache); - rkey = nfs_idmap_request_key(name, namelen, type, idmap); - revert_creds(saved_cred); - + scoped_with_creds(id_resolver_cache) + rkey = nfs_idmap_request_key(name, namelen, type, idmap); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); goto out; From c5c92c624aeb90d708db276ce2dd57db11fdb823 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:27:01 +0100 Subject: [PATCH 21/36] smb: use credential guards in cifs_get_spnego_key() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-13-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- fs/smb/client/cifs_spnego.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/smb/client/cifs_spnego.c b/fs/smb/client/cifs_spnego.c index 9891f55bac1e..da935bd1ce87 100644 --- a/fs/smb/client/cifs_spnego.c +++ b/fs/smb/client/cifs_spnego.c @@ -90,7 +90,6 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, size_t desc_len; struct key *spnego_key; const char *hostname = server->hostname; - const struct cred *saved_cred; /* length of fields (with semicolons): ver=0xyz ip4=ipaddress host=hostname sec=mechanism uid=0xFF user=username */ @@ -158,9 +157,8 @@ cifs_get_spnego_key(struct cifs_ses *sesInfo, dp += sprintf(dp, ";upcall_target=app"); cifs_dbg(FYI, "key description = %s\n", description); - saved_cred = override_creds(spnego_cred); - spnego_key = request_key(&cifs_spnego_key_type, description, ""); - revert_creds(saved_cred); + scoped_with_creds(spnego_cred) + spnego_key = request_key(&cifs_spnego_key_type, description, ""); #ifdef CONFIG_CIFS_DEBUG2 if (cifsFYI && !IS_ERR(spnego_key)) { From 5db84abd2afb822594291faea8b6a1336c74db44 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:27:02 +0100 Subject: [PATCH 22/36] act: use credential guards in acct_write_process() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-14-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- kernel/acct.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/kernel/acct.c b/kernel/acct.c index 61630110e29d..2a2b3c874acd 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -520,26 +520,23 @@ static void fill_ac(struct bsd_acct_struct *acct) static void acct_write_process(struct bsd_acct_struct *acct) { struct file *file = acct->file; - const struct cred *cred; acct_t *ac = &acct->ac; /* Perform file operations on behalf of whoever enabled accounting */ - cred = override_creds(file->f_cred); - - /* - * First check to see if there is enough free_space to continue - * the process accounting system. Then get freeze protection. If - * the fs is frozen, just skip the write as we could deadlock - * the system otherwise. - */ - if (check_free_space(acct) && file_start_write_trylock(file)) { - /* it's been opened O_APPEND, so position is irrelevant */ - loff_t pos = 0; - __kernel_write(file, ac, sizeof(acct_t), &pos); - file_end_write(file); + scoped_with_creds(file->f_cred) { + /* + * First check to see if there is enough free_space to continue + * the process accounting system. Then get freeze protection. If + * the fs is frozen, just skip the write as we could deadlock + * the system otherwise. + */ + if (check_free_space(acct) && file_start_write_trylock(file)) { + /* it's been opened O_APPEND, so position is irrelevant */ + loff_t pos = 0; + __kernel_write(file, ac, sizeof(acct_t), &pos); + file_end_write(file); + } } - - revert_creds(cred); } static void do_acct_process(struct bsd_acct_struct *acct) From b66c7af4d86de00db3c28294467bf986083dc963 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:27:03 +0100 Subject: [PATCH 23/36] cgroup: use credential guards in cgroup_attach_permissions() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-15-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- kernel/cgroup/cgroup.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index fdee387f0d6b..9f61f7cfc8d1 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5363,7 +5363,6 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, struct cgroup_file_ctx *ctx = of->priv; struct cgroup *src_cgrp, *dst_cgrp; struct task_struct *task; - const struct cred *saved_cred; ssize_t ret; enum cgroup_attach_lock_mode lock_mode; @@ -5386,11 +5385,10 @@ static ssize_t __cgroup_procs_write(struct kernfs_open_file *of, char *buf, * permissions using the credentials from file open to protect against * inherited fd attacks. */ - saved_cred = override_creds(of->file->f_cred); - ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, - of->file->f_path.dentry->d_sb, - threadgroup, ctx->ns); - revert_creds(saved_cred); + scoped_with_creds(of->file->f_cred) + ret = cgroup_attach_permissions(src_cgrp, dst_cgrp, + of->file->f_path.dentry->d_sb, + threadgroup, ctx->ns); if (ret) goto out_finish; From 4037e28cd47e5a860ea23214024bcbe8a7585d81 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 12:27:04 +0100 Subject: [PATCH 24/36] net/dns_resolver: use credential guards in dns_query() Use credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-simple-v1-16-a3e156839e7f@kernel.org Reviewed-by: Amir Goldstein Signed-off-by: Christian Brauner --- net/dns_resolver/dns_query.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index 82b084cc1cc6..53da62984447 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -78,7 +78,6 @@ int dns_query(struct net *net, { struct key *rkey; struct user_key_payload *upayload; - const struct cred *saved_cred; size_t typelen, desclen; char *desc, *cp; int ret, len; @@ -124,9 +123,8 @@ int dns_query(struct net *net, /* make the upcall, using special credentials to prevent the use of * add_key() to preinstall malicious redirections */ - saved_cred = override_creds(dns_resolver_cache); - rkey = request_key_net(&key_type_dns_resolver, desc, net, options); - revert_creds(saved_cred); + scoped_with_creds(dns_resolver_cache) + rkey = request_key_net(&key_type_dns_resolver, desc, net, options); kfree(desc); if (IS_ERR(rkey)) { ret = PTR_ERR(rkey); From c8ad3098e1272444b6c75910d6196a36f5c8bc17 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:27 +0100 Subject: [PATCH 25/36] cred: add prepare credential guard A lot of code uses the following pattern: * prepare new credentials * modify them for their use-case * drop them Support that easier with the new guard infrastructure. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-1-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- include/linux/cred.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/cred.h b/include/linux/cred.h index 6ea2d81a740b..343a140a6ba2 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -280,6 +280,11 @@ static inline void put_cred(const struct cred *cred) put_cred_many(cred, 1); } +DEFINE_CLASS(prepare_creds, + struct cred *, + if (_T) put_cred(_T), + prepare_creds(), void) + DEFINE_FREE(put_cred, struct cred *, if (!IS_ERR_OR_NULL(_T)) put_cred(_T)) /** From 4c5941ca1104d58a94e59100ebde97a162e72de4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:28 +0100 Subject: [PATCH 26/36] sev-dev: use guard for path Just use a guard and also move the path_put() out of the credential change's scope. There's no need to do this with the overridden credentials. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-2-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- drivers/crypto/ccp/sev-dev.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index 0d13d47c164b..c5e22af04abb 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -259,8 +259,8 @@ static int sev_cmd_buffer_len(int cmd) static struct file *open_file_as_root(const char *filename, int flags, umode_t mode) { + struct path root __free(path_put) = {}; struct file *fp; - struct path root; struct cred *cred; const struct cred *old_cred; @@ -275,7 +275,6 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m old_cred = override_creds(cred); fp = file_open_root(&root, filename, flags, mode); - path_put(&root); put_cred(revert_creds(old_cred)); From 89c545e29ecd6252968611b3ee2599034b911dd8 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:29 +0100 Subject: [PATCH 27/36] sev-dev: use prepare credential guard Use the prepare credential guard for allocating a new set of credentials. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-3-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- drivers/crypto/ccp/sev-dev.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index c5e22af04abb..be3e5454c285 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -261,22 +261,22 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m { struct path root __free(path_put) = {}; struct file *fp; - struct cred *cred; const struct cred *old_cred; task_lock(&init_task); get_fs_root(init_task.fs, &root); task_unlock(&init_task); - cred = prepare_creds(); + CLASS(prepare_creds, cred)(); if (!cred) return ERR_PTR(-ENOMEM); + cred->fsuid = GLOBAL_ROOT_UID; old_cred = override_creds(cred); fp = file_open_root(&root, filename, flags, mode); - put_cred(revert_creds(old_cred)); + revert_creds(old_cred); return fp; } From b7b4f7554bcc6b9ee0ec0404999bf080adad1f3c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:30 +0100 Subject: [PATCH 28/36] sev-dev: use override credential guards Use override credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-4-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- drivers/crypto/ccp/sev-dev.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/crypto/ccp/sev-dev.c b/drivers/crypto/ccp/sev-dev.c index be3e5454c285..b28a6f50daaa 100644 --- a/drivers/crypto/ccp/sev-dev.c +++ b/drivers/crypto/ccp/sev-dev.c @@ -260,8 +260,6 @@ static int sev_cmd_buffer_len(int cmd) static struct file *open_file_as_root(const char *filename, int flags, umode_t mode) { struct path root __free(path_put) = {}; - struct file *fp; - const struct cred *old_cred; task_lock(&init_task); get_fs_root(init_task.fs, &root); @@ -272,13 +270,9 @@ static struct file *open_file_as_root(const char *filename, int flags, umode_t m return ERR_PTR(-ENOMEM); cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds(cred); - fp = file_open_root(&root, filename, flags, mode); - - revert_creds(old_cred); - - return fp; + scoped_with_creds(cred) + return file_open_root(&root, filename, flags, mode); } static int sev_read_init_ex_file(void) From eb937201bad03bf2f25ac630979e521fbb5e2a07 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:31 +0100 Subject: [PATCH 29/36] coredump: move revert_cred() before coredump_cleanup() There's no need to pin the credentials across the coredump_cleanup() call. Nothing in there depends on elevated credentials. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-5-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/coredump.c b/fs/coredump.c index 5c1c381ee380..4fce2a2f279c 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1197,8 +1197,8 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) } close_fail: - coredump_cleanup(&cn, &cprm); revert_creds(old_cred); + coredump_cleanup(&cn, &cprm); return; } From 1ec760fb42404dd7257d1c73dd68295a0d1a974f Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:32 +0100 Subject: [PATCH 30/36] coredump: pass struct linux_binfmt as const We don't actually modify it. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-6-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 4fce2a2f279c..590360ba0a28 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1036,7 +1036,7 @@ static bool coredump_pipe(struct core_name *cn, struct coredump_params *cprm, static bool coredump_write(struct core_name *cn, struct coredump_params *cprm, - struct linux_binfmt *binfmt) + const struct linux_binfmt *binfmt) { if (dump_interrupted()) @@ -1093,7 +1093,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct core_state core_state; struct core_name cn; struct mm_struct *mm = current->mm; - struct linux_binfmt *binfmt = mm->binfmt; + const struct linux_binfmt *binfmt = mm->binfmt; const struct cred *old_cred; int argc = 0; struct coredump_params cprm = { From 313a335057f0894e6e59290d4e7fb8b35ec250e6 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:33 +0100 Subject: [PATCH 31/36] coredump: mark struct mm_struct as const We don't actually modify it. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-7-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 2 +- include/linux/sched/coredump.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 590360ba0a28..8253b28bc728 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1092,7 +1092,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) size_t *argv __free(kfree) = NULL; struct core_state core_state; struct core_name cn; - struct mm_struct *mm = current->mm; + const struct mm_struct *mm = current->mm; const struct linux_binfmt *binfmt = mm->binfmt; const struct cred *old_cred; int argc = 0; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index b7fafe999073..624fda17a785 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -8,7 +8,7 @@ #define SUID_DUMP_USER 1 /* Dump as user of process */ #define SUID_DUMP_ROOT 2 /* Dump as root */ -static inline unsigned long __mm_flags_get_dumpable(struct mm_struct *mm) +static inline unsigned long __mm_flags_get_dumpable(const struct mm_struct *mm) { /* * By convention, dumpable bits are contained in first 32 bits of the From af9803d4b8ca3f59ec66bb6b1557e40a18bc5599 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:34 +0100 Subject: [PATCH 32/36] coredump: split out do_coredump() from vfs_coredump() Make the function easier to follow and prepare for some of the following changes. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-8-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 131 ++++++++++++++++++++++++++------------------------ 1 file changed, 68 insertions(+), 63 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 8253b28bc728..79c681f1d647 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1086,6 +1086,73 @@ static inline bool coredump_skip(const struct coredump_params *cprm, return false; } +static void do_coredump(struct core_name *cn, struct coredump_params *cprm, + size_t **argv, int *argc, const struct linux_binfmt *binfmt) +{ + if (!coredump_parse(cn, cprm, argv, argc)) { + coredump_report_failure("format_corename failed, aborting core"); + return; + } + + switch (cn->core_type) { + case COREDUMP_FILE: + if (!coredump_file(cn, cprm, binfmt)) + return; + break; + case COREDUMP_PIPE: + if (!coredump_pipe(cn, cprm, *argv, *argc)) + return; + break; + case COREDUMP_SOCK_REQ: + fallthrough; + case COREDUMP_SOCK: + if (!coredump_socket(cn, cprm)) + return; + break; + default: + WARN_ON_ONCE(true); + return; + } + + /* Don't even generate the coredump. */ + if (cn->mask & COREDUMP_REJECT) + return; + + /* get us an unshared descriptor table; almost always a no-op */ + /* The cell spufs coredump code reads the file descriptor tables */ + if (unshare_files()) + return; + + if ((cn->mask & COREDUMP_KERNEL) && !coredump_write(cn, cprm, binfmt)) + return; + + coredump_sock_shutdown(cprm->file); + + /* Let the parent know that a coredump was generated. */ + if (cn->mask & COREDUMP_USERSPACE) + cn->core_dumped = true; + + /* + * When core_pipe_limit is set we wait for the coredump server + * or usermodehelper to finish before exiting so it can e.g., + * inspect /proc/. + */ + if (cn->mask & COREDUMP_WAIT) { + switch (cn->core_type) { + case COREDUMP_PIPE: + wait_for_dump_helpers(cprm->file); + break; + case COREDUMP_SOCK_REQ: + fallthrough; + case COREDUMP_SOCK: + coredump_sock_wait(cprm->file); + break; + default: + break; + } + } +} + void vfs_coredump(const kernel_siginfo_t *siginfo) { struct cred *cred __free(put_cred) = NULL; @@ -1133,70 +1200,8 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) old_cred = override_creds(cred); - if (!coredump_parse(&cn, &cprm, &argv, &argc)) { - coredump_report_failure("format_corename failed, aborting core"); - goto close_fail; - } + do_coredump(&cn, &cprm, &argv, &argc, binfmt); - switch (cn.core_type) { - case COREDUMP_FILE: - if (!coredump_file(&cn, &cprm, binfmt)) - goto close_fail; - break; - case COREDUMP_PIPE: - if (!coredump_pipe(&cn, &cprm, argv, argc)) - goto close_fail; - break; - case COREDUMP_SOCK_REQ: - fallthrough; - case COREDUMP_SOCK: - if (!coredump_socket(&cn, &cprm)) - goto close_fail; - break; - default: - WARN_ON_ONCE(true); - goto close_fail; - } - - /* Don't even generate the coredump. */ - if (cn.mask & COREDUMP_REJECT) - goto close_fail; - - /* get us an unshared descriptor table; almost always a no-op */ - /* The cell spufs coredump code reads the file descriptor tables */ - if (unshare_files()) - goto close_fail; - - if ((cn.mask & COREDUMP_KERNEL) && !coredump_write(&cn, &cprm, binfmt)) - goto close_fail; - - coredump_sock_shutdown(cprm.file); - - /* Let the parent know that a coredump was generated. */ - if (cn.mask & COREDUMP_USERSPACE) - cn.core_dumped = true; - - /* - * When core_pipe_limit is set we wait for the coredump server - * or usermodehelper to finish before exiting so it can e.g., - * inspect /proc/. - */ - if (cn.mask & COREDUMP_WAIT) { - switch (cn.core_type) { - case COREDUMP_PIPE: - wait_for_dump_helpers(cprm.file); - break; - case COREDUMP_SOCK_REQ: - fallthrough; - case COREDUMP_SOCK: - coredump_sock_wait(cprm.file); - break; - default: - break; - } - } - -close_fail: revert_creds(old_cred); coredump_cleanup(&cn, &cprm); return; From 8ed3473c5a8b356c8af950a29d5620be337c3cab Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:35 +0100 Subject: [PATCH 33/36] coredump: use prepare credential guard Use the prepare credential guard for allocating a new set of credentials. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-9-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 79c681f1d647..5424a6c4e360 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1155,7 +1155,6 @@ static void do_coredump(struct core_name *cn, struct coredump_params *cprm, void vfs_coredump(const kernel_siginfo_t *siginfo) { - struct cred *cred __free(put_cred) = NULL; size_t *argv __free(kfree) = NULL; struct core_state core_state; struct core_name cn; @@ -1183,7 +1182,7 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (coredump_skip(&cprm, binfmt)) return; - cred = prepare_creds(); + CLASS(prepare_creds, cred)(); if (!cred) return; /* From 545985dd3701988c95cba9a8f895631de2039b21 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:36 +0100 Subject: [PATCH 34/36] coredump: use override credential guard Use override credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-10-b447b82f2c9b@kernel.org Signed-off-by: Christian Brauner --- fs/coredump.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/fs/coredump.c b/fs/coredump.c index 5424a6c4e360..fe4099e0530b 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -1160,7 +1160,6 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) struct core_name cn; const struct mm_struct *mm = current->mm; const struct linux_binfmt *binfmt = mm->binfmt; - const struct cred *old_cred; int argc = 0; struct coredump_params cprm = { .siginfo = siginfo, @@ -1197,11 +1196,8 @@ void vfs_coredump(const kernel_siginfo_t *siginfo) if (coredump_wait(siginfo->si_signo, &core_state) < 0) return; - old_cred = override_creds(cred); - - do_coredump(&cn, &cprm, &argv, &argc, binfmt); - - revert_creds(old_cred); + scoped_with_creds(cred) + do_coredump(&cn, &cprm, &argv, &argc, binfmt); coredump_cleanup(&cn, &cprm); return; } From 2ed6a34de9851dcd4db8441a33882b168261be88 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:37 +0100 Subject: [PATCH 35/36] trace: use prepare credential guard Use the prepare credential guard for allocating a new set of credentials. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-11-b447b82f2c9b@kernel.org Acked-by: Steven Rostedt (Google) Signed-off-by: Christian Brauner --- kernel/trace/trace_events_user.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index c428dafe7496..28c62149eec5 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1451,10 +1451,8 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) { int ret; const struct cred *old_cred; - struct cred *cred; - - cred = prepare_creds(); + CLASS(prepare_creds, cred)(); if (!cred) return -ENOMEM; @@ -1477,7 +1475,6 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) ret = trace_remove_event_call(&user->call); revert_creds(old_cred); - put_cred(cred); return ret; } From 06765b6efc463ce4d3c0c80a3cc2c888dc902dfa Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 15:57:38 +0100 Subject: [PATCH 36/36] trace: use override credential guard Use override credential guards for scoped credential override with automatic restoration on scope exit. Link: https://patch.msgid.link/20251103-work-creds-guards-prepare_creds-v1-12-b447b82f2c9b@kernel.org Acked-by: Steven Rostedt (Google) Signed-off-by: Christian Brauner --- kernel/trace/trace_events_user.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/kernel/trace/trace_events_user.c b/kernel/trace/trace_events_user.c index 28c62149eec5..b15854c75d4f 100644 --- a/kernel/trace/trace_events_user.c +++ b/kernel/trace/trace_events_user.c @@ -1449,9 +1449,6 @@ static struct trace_event_functions user_event_funcs = { static int user_event_set_call_visible(struct user_event *user, bool visible) { - int ret; - const struct cred *old_cred; - CLASS(prepare_creds, cred)(); if (!cred) return -ENOMEM; @@ -1467,16 +1464,12 @@ static int user_event_set_call_visible(struct user_event *user, bool visible) */ cred->fsuid = GLOBAL_ROOT_UID; - old_cred = override_creds(cred); + scoped_with_creds(cred) { + if (visible) + return trace_add_event_call(&user->call); - if (visible) - ret = trace_add_event_call(&user->call); - else - ret = trace_remove_event_call(&user->call); - - revert_creds(old_cred); - - return ret; + return trace_remove_event_call(&user->call); + } } static int destroy_user_event(struct user_event *user)