diff --git a/include/linux/rseq.h b/include/linux/rseq.h index c6267f70c746..ab91b1e6bb4a 100644 --- a/include/linux/rseq.h +++ b/include/linux/rseq.h @@ -9,22 +9,22 @@ void __rseq_handle_notify_resume(struct ksignal *sig, struct pt_regs *regs); static inline void rseq_handle_notify_resume(struct pt_regs *regs) { - if (current->rseq) + if (current->rseq.event.has_rseq) __rseq_handle_notify_resume(NULL, regs); } static inline void rseq_signal_deliver(struct ksignal *ksig, struct pt_regs *regs) { - if (current->rseq) { - current->rseq_event_pending = true; + if (current->rseq.event.has_rseq) { + current->rseq.event.sched_switch = true; __rseq_handle_notify_resume(ksig, regs); } } static inline void rseq_sched_switch_event(struct task_struct *t) { - if (t->rseq) { - t->rseq_event_pending = true; + if (t->rseq.event.has_rseq) { + t->rseq.event.sched_switch = true; set_tsk_thread_flag(t, TIF_NOTIFY_RESUME); } } @@ -32,8 +32,9 @@ static inline void rseq_sched_switch_event(struct task_struct *t) static __always_inline void rseq_exit_to_user_mode(void) { if (IS_ENABLED(CONFIG_DEBUG_RSEQ)) { - if (WARN_ON_ONCE(current->rseq && current->rseq_event_pending)) - current->rseq_event_pending = false; + if (WARN_ON_ONCE(current->rseq.event.has_rseq && + current->rseq.event.events)) + current->rseq.event.events = 0; } } @@ -49,35 +50,30 @@ static __always_inline void rseq_exit_to_user_mode(void) */ static inline void rseq_virt_userspace_exit(void) { - if (current->rseq_event_pending) + if (current->rseq.event.sched_switch) set_tsk_thread_flag(current, TIF_NOTIFY_RESUME); } +static inline void rseq_reset(struct task_struct *t) +{ + memset(&t->rseq, 0, sizeof(t->rseq)); +} + +static inline void rseq_execve(struct task_struct *t) +{ + rseq_reset(t); +} + /* * If parent process has a registered restartable sequences area, the * child inherits. Unregister rseq for a clone with CLONE_VM set. */ static inline void rseq_fork(struct task_struct *t, u64 clone_flags) { - if (clone_flags & CLONE_VM) { - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_pending = false; - } else { + if (clone_flags & CLONE_VM) + rseq_reset(t); + else t->rseq = current->rseq; - t->rseq_len = current->rseq_len; - t->rseq_sig = current->rseq_sig; - t->rseq_event_pending = current->rseq_event_pending; - } -} - -static inline void rseq_execve(struct task_struct *t) -{ - t->rseq = NULL; - t->rseq_len = 0; - t->rseq_sig = 0; - t->rseq_event_pending = false; } #else /* CONFIG_RSEQ */ diff --git a/include/linux/rseq_types.h b/include/linux/rseq_types.h new file mode 100644 index 000000000000..f7a60c8eddc9 --- /dev/null +++ b/include/linux/rseq_types.h @@ -0,0 +1,51 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_RSEQ_TYPES_H +#define _LINUX_RSEQ_TYPES_H + +#include + +#ifdef CONFIG_RSEQ +struct rseq; + +/** + * struct rseq_event - Storage for rseq related event management + * @all: Compound to initialize and clear the data efficiently + * @events: Compound to access events with a single load/store + * @sched_switch: True if the task was scheduled out + * @has_rseq: True if the task has a rseq pointer installed + */ +struct rseq_event { + union { + u32 all; + struct { + union { + u16 events; + struct { + u8 sched_switch; + }; + }; + + u8 has_rseq; + }; + }; +}; + +/** + * struct rseq_data - Storage for all rseq related data + * @usrptr: Pointer to the registered user space RSEQ memory + * @len: Length of the RSEQ region + * @sig: Signature of critial section abort IPs + * @event: Storage for event management + */ +struct rseq_data { + struct rseq __user *usrptr; + u32 len; + u32 sig; + struct rseq_event event; +}; + +#else /* CONFIG_RSEQ */ +struct rseq_data { }; +#endif /* !CONFIG_RSEQ */ + +#endif diff --git a/include/linux/sched.h b/include/linux/sched.h index 6627c527c2c7..15627769409d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -1406,16 +1407,8 @@ struct task_struct { unsigned long numa_pages_migrated; #endif /* CONFIG_NUMA_BALANCING */ -#ifdef CONFIG_RSEQ - struct rseq __user *rseq; - u32 rseq_len; - u32 rseq_sig; - /* - * RmW on rseq_event_pending must be performed atomically - * with respect to preemption. - */ - bool rseq_event_pending; -# ifdef CONFIG_DEBUG_RSEQ + struct rseq_data rseq; +#ifdef CONFIG_DEBUG_RSEQ /* * This is a place holder to save a copy of the rseq fields for * validation of read-only fields. The struct rseq has a @@ -1423,7 +1416,6 @@ struct task_struct { * directly. Reserve a size large enough for the known fields. */ char rseq_fields[sizeof(struct rseq)]; -# endif #endif #ifdef CONFIG_SCHED_MM_CID diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 75a84efad40f..392ec2f75f01 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -793,9 +793,9 @@ static long ptrace_get_rseq_configuration(struct task_struct *task, unsigned long size, void __user *data) { struct ptrace_rseq_configuration conf = { - .rseq_abi_pointer = (u64)(uintptr_t)task->rseq, - .rseq_abi_size = task->rseq_len, - .signature = task->rseq_sig, + .rseq_abi_pointer = (u64)(uintptr_t)task->rseq.usrptr, + .rseq_abi_size = task->rseq.len, + .signature = task->rseq.sig, .flags = 0, }; diff --git a/kernel/rseq.c b/kernel/rseq.c index 81dddafa2f2e..aae62661e6bb 100644 --- a/kernel/rseq.c +++ b/kernel/rseq.c @@ -103,13 +103,13 @@ static int rseq_validate_ro_fields(struct task_struct *t) DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); u32 cpu_id_start, cpu_id, node_id, mm_cid; - struct rseq __user *rseq = t->rseq; + struct rseq __user *rseq = t->rseq.usrptr; /* * Validate fields which are required to be read-only by * user-space. */ - if (!user_read_access_begin(rseq, t->rseq_len)) + if (!user_read_access_begin(rseq, t->rseq.len)) goto efault; unsafe_get_user(cpu_id_start, &rseq->cpu_id_start, efault_end); unsafe_get_user(cpu_id, &rseq->cpu_id, efault_end); @@ -147,10 +147,10 @@ static int rseq_validate_ro_fields(struct task_struct *t) * Update an rseq field and its in-kernel copy in lock-step to keep a coherent * state. */ -#define rseq_unsafe_put_user(t, value, field, error_label) \ - do { \ - unsafe_put_user(value, &t->rseq->field, error_label); \ - rseq_kernel_fields(t)->field = value; \ +#define rseq_unsafe_put_user(t, value, field, error_label) \ + do { \ + unsafe_put_user(value, &t->rseq.usrptr->field, error_label); \ + rseq_kernel_fields(t)->field = value; \ } while (0) #else @@ -160,12 +160,12 @@ static int rseq_validate_ro_fields(struct task_struct *t) } #define rseq_unsafe_put_user(t, value, field, error_label) \ - unsafe_put_user(value, &t->rseq->field, error_label) + unsafe_put_user(value, &t->rseq.usrptr->field, error_label) #endif static int rseq_update_cpu_node_id(struct task_struct *t) { - struct rseq __user *rseq = t->rseq; + struct rseq __user *rseq = t->rseq.usrptr; u32 cpu_id = raw_smp_processor_id(); u32 node_id = cpu_to_node(cpu_id); u32 mm_cid = task_mm_cid(t); @@ -176,7 +176,7 @@ static int rseq_update_cpu_node_id(struct task_struct *t) if (rseq_validate_ro_fields(t)) goto efault; WARN_ON_ONCE((int) mm_cid < 0); - if (!user_write_access_begin(rseq, t->rseq_len)) + if (!user_write_access_begin(rseq, t->rseq.len)) goto efault; rseq_unsafe_put_user(t, cpu_id, cpu_id_start, efault_end); @@ -201,7 +201,7 @@ static int rseq_update_cpu_node_id(struct task_struct *t) static int rseq_reset_rseq_cpu_node_id(struct task_struct *t) { - struct rseq __user *rseq = t->rseq; + struct rseq __user *rseq = t->rseq.usrptr; u32 cpu_id_start = 0, cpu_id = RSEQ_CPU_ID_UNINITIALIZED, node_id = 0, mm_cid = 0; @@ -211,7 +211,7 @@ static int rseq_reset_rseq_cpu_node_id(struct task_struct *t) if (rseq_validate_ro_fields(t)) goto efault; - if (!user_write_access_begin(rseq, t->rseq_len)) + if (!user_write_access_begin(rseq, t->rseq.len)) goto efault; /* @@ -272,7 +272,7 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) u32 sig; int ret; - ret = rseq_get_rseq_cs_ptr_val(t->rseq, &ptr); + ret = rseq_get_rseq_cs_ptr_val(t->rseq.usrptr, &ptr); if (ret) return ret; @@ -305,10 +305,10 @@ static int rseq_get_rseq_cs(struct task_struct *t, struct rseq_cs *rseq_cs) if (ret) return ret; - if (current->rseq_sig != sig) { + if (current->rseq.sig != sig) { printk_ratelimited(KERN_WARNING "Possible attack attempt. Unexpected rseq signature 0x%x, expecting 0x%x (pid=%d, addr=%p).\n", - sig, current->rseq_sig, current->pid, usig); + sig, current->rseq.sig, current->pid, usig); return -EINVAL; } return 0; @@ -338,7 +338,7 @@ static int rseq_check_flags(struct task_struct *t, u32 cs_flags) return -EINVAL; /* Get thread flags. */ - ret = get_user(flags, &t->rseq->flags); + ret = get_user(flags, &t->rseq.usrptr->flags); if (ret) return ret; @@ -392,13 +392,13 @@ static int rseq_ip_fixup(struct pt_regs *regs, bool abort) * Clear the rseq_cs pointer and return. */ if (!in_rseq_cs(ip, &rseq_cs)) - return clear_rseq_cs(t->rseq); + return clear_rseq_cs(t->rseq.usrptr); ret = rseq_check_flags(t, rseq_cs.flags); if (ret < 0) return ret; if (!abort) return 0; - ret = clear_rseq_cs(t->rseq); + ret = clear_rseq_cs(t->rseq.usrptr); if (ret) return ret; trace_rseq_ip_fixup(ip, rseq_cs.start_ip, rseq_cs.post_commit_offset, @@ -460,8 +460,8 @@ void __rseq_handle_notify_resume(struct ksignal *ksig, struct pt_regs *regs) * inconsistencies. */ scoped_guard(RSEQ_EVENT_GUARD) { - event = t->rseq_event_pending; - t->rseq_event_pending = false; + event = t->rseq.event.sched_switch; + t->rseq.event.sched_switch = false; } if (!IS_ENABLED(CONFIG_DEBUG_RSEQ) && !event) @@ -492,7 +492,7 @@ void rseq_syscall(struct pt_regs *regs) struct task_struct *t = current; struct rseq_cs rseq_cs; - if (!t->rseq) + if (!t->rseq.usrptr) return; if (rseq_get_rseq_cs(t, &rseq_cs) || in_rseq_cs(ip, &rseq_cs)) force_sig(SIGSEGV); @@ -511,33 +511,31 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 if (flags & ~RSEQ_FLAG_UNREGISTER) return -EINVAL; /* Unregister rseq for current thread. */ - if (current->rseq != rseq || !current->rseq) + if (current->rseq.usrptr != rseq || !current->rseq.usrptr) return -EINVAL; - if (rseq_len != current->rseq_len) + if (rseq_len != current->rseq.len) return -EINVAL; - if (current->rseq_sig != sig) + if (current->rseq.sig != sig) return -EPERM; ret = rseq_reset_rseq_cpu_node_id(current); if (ret) return ret; - current->rseq = NULL; - current->rseq_sig = 0; - current->rseq_len = 0; + rseq_reset(current); return 0; } if (unlikely(flags)) return -EINVAL; - if (current->rseq) { + if (current->rseq.usrptr) { /* * If rseq is already registered, check whether * the provided address differs from the prior * one. */ - if (current->rseq != rseq || rseq_len != current->rseq_len) + if (current->rseq.usrptr != rseq || rseq_len != current->rseq.len) return -EINVAL; - if (current->rseq_sig != sig) + if (current->rseq.sig != sig) return -EPERM; /* Already registered. */ return -EBUSY; @@ -586,15 +584,16 @@ SYSCALL_DEFINE4(rseq, struct rseq __user *, rseq, u32, rseq_len, int, flags, u32 * Activate the registration by setting the rseq area address, length * and signature in the task struct. */ - current->rseq = rseq; - current->rseq_len = rseq_len; - current->rseq_sig = sig; + current->rseq.usrptr = rseq; + current->rseq.len = rseq_len; + current->rseq.sig = sig; /* * If rseq was previously inactive, and has just been * registered, ensure the cpu_id_start and cpu_id fields * are updated before returning to user-space. */ + current->rseq.event.has_rseq = true; rseq_sched_switch_event(current); return 0;