mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 01:24:47 +01:00
landlock: Multithreading support for landlock_restrict_self()
Introduce the LANDLOCK_RESTRICT_SELF_TSYNC flag. With this flag, a given Landlock ruleset is applied to all threads of the calling process, instead of only the current one. Without this flag, multithreaded userspace programs currently resort to using the nptl(7)/libpsx hack for multithreaded policy enforcement, which is also used by libcap and for setuid(2). Using this userspace-based scheme, the threads of a process enforce the same Landlock policy, but the resulting Landlock domains are still separate. The domains being separate causes multiple problems: * When using Landlock's "scoped" access rights, the domain identity is used to determine whether an operation is permitted. As a result, when using LANLDOCK_SCOPE_SIGNAL, signaling between sibling threads stops working. This is a problem for programming languages and frameworks which are inherently multithreaded (e.g. Go). * In audit logging, the domains of separate threads in a process will get logged with different domain IDs, even when they are based on the same ruleset FD, which might confuse users. Cc: Andrew G. Morgan <morgan@kernel.org> Cc: John Johansen <john.johansen@canonical.com> Cc: Paul Moore <paul@paul-moore.com> Suggested-by: Jann Horn <jannh@google.com> Signed-off-by: Günther Noack <gnoack@google.com> Link: https://lore.kernel.org/r/20251127115136.3064948-2-gnoack@google.com [mic: Fix restrict_self_flags test, clean up Makefile, allign comments, reduce local variable scope, add missing includes] Closes: https://github.com/landlock-lsm/linux/issues/2 Signed-off-by: Mickaël Salaün <mic@digikod.net>
This commit is contained in:
parent
24d479d26b
commit
42fc7e6543
8 changed files with 654 additions and 34 deletions
|
|
@ -1,7 +1,14 @@
|
|||
obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o
|
||||
|
||||
landlock-y := setup.o syscalls.o object.o ruleset.o \
|
||||
cred.o task.o fs.o
|
||||
landlock-y := \
|
||||
setup.o \
|
||||
syscalls.o \
|
||||
object.o \
|
||||
ruleset.o \
|
||||
cred.o \
|
||||
task.o \
|
||||
fs.o \
|
||||
tsync.o
|
||||
|
||||
landlock-$(CONFIG_INET) += net.o
|
||||
|
||||
|
|
|
|||
|
|
@ -26,6 +26,8 @@
|
|||
* This structure is packed to minimize the size of struct
|
||||
* landlock_file_security. However, it is always aligned in the LSM cred blob,
|
||||
* see lsm_set_blob_size().
|
||||
*
|
||||
* When updating this, also update landlock_cred_copy() if needed.
|
||||
*/
|
||||
struct landlock_cred_security {
|
||||
/**
|
||||
|
|
@ -65,6 +67,16 @@ landlock_cred(const struct cred *cred)
|
|||
return cred->security + landlock_blob_sizes.lbs_cred;
|
||||
}
|
||||
|
||||
static inline void landlock_cred_copy(struct landlock_cred_security *dst,
|
||||
const struct landlock_cred_security *src)
|
||||
{
|
||||
landlock_put_ruleset(dst->domain);
|
||||
|
||||
*dst = *src;
|
||||
|
||||
landlock_get_ruleset(src->domain);
|
||||
}
|
||||
|
||||
static inline struct landlock_ruleset *landlock_get_current_domain(void)
|
||||
{
|
||||
return landlock_cred(current_cred())->domain;
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@
|
|||
#define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1)
|
||||
#define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE)
|
||||
|
||||
#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
|
||||
#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC
|
||||
#define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1)
|
||||
|
||||
/* clang-format on */
|
||||
|
|
|
|||
|
|
@ -36,6 +36,7 @@
|
|||
#include "net.h"
|
||||
#include "ruleset.h"
|
||||
#include "setup.h"
|
||||
#include "tsync.h"
|
||||
|
||||
static bool is_initialized(void)
|
||||
{
|
||||
|
|
@ -161,7 +162,7 @@ static const struct file_operations ruleset_fops = {
|
|||
* Documentation/userspace-api/landlock.rst should be updated to reflect the
|
||||
* UAPI change.
|
||||
*/
|
||||
const int landlock_abi_version = 7;
|
||||
const int landlock_abi_version = 8;
|
||||
|
||||
/**
|
||||
* sys_landlock_create_ruleset - Create a new ruleset
|
||||
|
|
@ -454,9 +455,10 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
|
|||
* - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF
|
||||
* - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON
|
||||
* - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF
|
||||
* - %LANDLOCK_RESTRICT_SELF_TSYNC
|
||||
*
|
||||
* This system call enables to enforce a Landlock ruleset on the current
|
||||
* thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
|
||||
* This system call enforces a Landlock ruleset on the current thread.
|
||||
* Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its
|
||||
* namespace or is running with no_new_privs. This avoids scenarios where
|
||||
* unprivileged tasks can affect the behavior of privileged children.
|
||||
*
|
||||
|
|
@ -478,8 +480,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd,
|
|||
SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
|
||||
flags)
|
||||
{
|
||||
struct landlock_ruleset *new_dom,
|
||||
*ruleset __free(landlock_put_ruleset) = NULL;
|
||||
struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL;
|
||||
struct cred *new_cred;
|
||||
struct landlock_cred_security *new_llcred;
|
||||
bool __maybe_unused log_same_exec, log_new_exec, log_subdomains,
|
||||
|
|
@ -538,33 +539,43 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32,
|
|||
* We could optimize this case by not calling commit_creds() if this flag
|
||||
* was already set, but it is not worth the complexity.
|
||||
*/
|
||||
if (!ruleset)
|
||||
return commit_creds(new_cred);
|
||||
if (ruleset) {
|
||||
/*
|
||||
* There is no possible race condition while copying and
|
||||
* manipulating the current credentials because they are
|
||||
* dedicated per thread.
|
||||
*/
|
||||
struct landlock_ruleset *const new_dom =
|
||||
landlock_merge_ruleset(new_llcred->domain, ruleset);
|
||||
if (IS_ERR(new_dom)) {
|
||||
abort_creds(new_cred);
|
||||
return PTR_ERR(new_dom);
|
||||
}
|
||||
|
||||
/*
|
||||
* There is no possible race condition while copying and manipulating
|
||||
* the current credentials because they are dedicated per thread.
|
||||
*/
|
||||
new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset);
|
||||
if (IS_ERR(new_dom)) {
|
||||
abort_creds(new_cred);
|
||||
return PTR_ERR(new_dom);
|
||||
#ifdef CONFIG_AUDIT
|
||||
new_dom->hierarchy->log_same_exec = log_same_exec;
|
||||
new_dom->hierarchy->log_new_exec = log_new_exec;
|
||||
if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
|
||||
new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
|
||||
#endif /* CONFIG_AUDIT */
|
||||
|
||||
/* Replaces the old (prepared) domain. */
|
||||
landlock_put_ruleset(new_llcred->domain);
|
||||
new_llcred->domain = new_dom;
|
||||
|
||||
#ifdef CONFIG_AUDIT
|
||||
new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
|
||||
#endif /* CONFIG_AUDIT */
|
||||
}
|
||||
|
||||
#ifdef CONFIG_AUDIT
|
||||
new_dom->hierarchy->log_same_exec = log_same_exec;
|
||||
new_dom->hierarchy->log_new_exec = log_new_exec;
|
||||
if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains)
|
||||
new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED;
|
||||
#endif /* CONFIG_AUDIT */
|
||||
|
||||
/* Replaces the old (prepared) domain. */
|
||||
landlock_put_ruleset(new_llcred->domain);
|
||||
new_llcred->domain = new_dom;
|
||||
|
||||
#ifdef CONFIG_AUDIT
|
||||
new_llcred->domain_exec |= BIT(new_dom->num_layers - 1);
|
||||
#endif /* CONFIG_AUDIT */
|
||||
if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) {
|
||||
const int err = landlock_restrict_sibling_threads(
|
||||
current_cred(), new_cred);
|
||||
if (err) {
|
||||
abort_creds(new_cred);
|
||||
return err;
|
||||
}
|
||||
}
|
||||
|
||||
return commit_creds(new_cred);
|
||||
}
|
||||
|
|
|
|||
561
security/landlock/tsync.c
Normal file
561
security/landlock/tsync.c
Normal file
|
|
@ -0,0 +1,561 @@
|
|||
// SPDX-License-Identifier: GPL-2.0-only
|
||||
/*
|
||||
* Landlock - Cross-thread ruleset enforcement
|
||||
*
|
||||
* Copyright © 2025 Google LLC
|
||||
*/
|
||||
|
||||
#include <linux/atomic.h>
|
||||
#include <linux/cleanup.h>
|
||||
#include <linux/completion.h>
|
||||
#include <linux/cred.h>
|
||||
#include <linux/errno.h>
|
||||
#include <linux/overflow.h>
|
||||
#include <linux/rcupdate.h>
|
||||
#include <linux/sched.h>
|
||||
#include <linux/sched/signal.h>
|
||||
#include <linux/sched/task.h>
|
||||
#include <linux/slab.h>
|
||||
#include <linux/task_work.h>
|
||||
|
||||
#include "cred.h"
|
||||
#include "tsync.h"
|
||||
|
||||
/*
|
||||
* Shared state between multiple threads which are enforcing Landlock rulesets
|
||||
* in lockstep with each other.
|
||||
*/
|
||||
struct tsync_shared_context {
|
||||
/* The old and tentative new creds of the calling thread. */
|
||||
const struct cred *old_cred;
|
||||
const struct cred *new_cred;
|
||||
|
||||
/* True if sibling tasks need to set the no_new_privs flag. */
|
||||
bool set_no_new_privs;
|
||||
|
||||
/* An error encountered in preparation step, or 0. */
|
||||
atomic_t preparation_error;
|
||||
|
||||
/*
|
||||
* Barrier after preparation step in restrict_one_thread.
|
||||
* The calling thread waits for completion.
|
||||
*
|
||||
* Re-initialized on every round of looking for newly spawned threads.
|
||||
*/
|
||||
atomic_t num_preparing;
|
||||
struct completion all_prepared;
|
||||
|
||||
/* Sibling threads wait for completion. */
|
||||
struct completion ready_to_commit;
|
||||
|
||||
/*
|
||||
* Barrier after commit step (used by syscall impl to wait for
|
||||
* completion).
|
||||
*/
|
||||
atomic_t num_unfinished;
|
||||
struct completion all_finished;
|
||||
};
|
||||
|
||||
struct tsync_work {
|
||||
struct callback_head work;
|
||||
struct task_struct *task;
|
||||
struct tsync_shared_context *shared_ctx;
|
||||
};
|
||||
|
||||
/*
|
||||
* restrict_one_thread - update a thread's Landlock domain in lockstep with the
|
||||
* other threads in the same process
|
||||
*
|
||||
* When this is run, the same function gets run in all other threads in the same
|
||||
* process (except for the calling thread which called landlock_restrict_self).
|
||||
* The concurrently running invocations of restrict_one_thread coordinate
|
||||
* through the shared ctx object to do their work in lockstep to implement
|
||||
* all-or-nothing semantics for enforcing the new Landlock domain.
|
||||
*
|
||||
* Afterwards, depending on the presence of an error, all threads either commit
|
||||
* or abort the prepared credentials. The commit operation can not fail any
|
||||
* more.
|
||||
*/
|
||||
static void restrict_one_thread(struct tsync_shared_context *ctx)
|
||||
{
|
||||
int err;
|
||||
struct cred *cred = NULL;
|
||||
|
||||
if (current_cred() == ctx->old_cred) {
|
||||
/*
|
||||
* Switch out old_cred with new_cred, if possible.
|
||||
*
|
||||
* In the common case, where all threads initially point to the same
|
||||
* struct cred, this optimization avoids creating separate redundant
|
||||
* credentials objects for each, which would all have the same contents.
|
||||
*
|
||||
* Note: We are intentionally dropping the const qualifier here, because
|
||||
* it is required by commit_creds() and abort_creds().
|
||||
*/
|
||||
cred = (struct cred *)get_cred(ctx->new_cred);
|
||||
} else {
|
||||
/* Else, prepare new creds and populate them. */
|
||||
cred = prepare_creds();
|
||||
|
||||
if (!cred) {
|
||||
atomic_set(&ctx->preparation_error, -ENOMEM);
|
||||
|
||||
/*
|
||||
* Even on error, we need to adhere to the protocol and coordinate
|
||||
* with concurrently running invocations.
|
||||
*/
|
||||
if (atomic_dec_return(&ctx->num_preparing) == 0)
|
||||
complete_all(&ctx->all_prepared);
|
||||
|
||||
goto out;
|
||||
}
|
||||
|
||||
landlock_cred_copy(landlock_cred(cred),
|
||||
landlock_cred(ctx->new_cred));
|
||||
}
|
||||
|
||||
/*
|
||||
* Barrier: Wait until all threads are done preparing.
|
||||
* After this point, we can have no more failures.
|
||||
*/
|
||||
if (atomic_dec_return(&ctx->num_preparing) == 0)
|
||||
complete_all(&ctx->all_prepared);
|
||||
|
||||
/*
|
||||
* Wait for signal from calling thread that it's safe to read the
|
||||
* preparation error now and we are ready to commit (or abort).
|
||||
*/
|
||||
wait_for_completion(&ctx->ready_to_commit);
|
||||
|
||||
/* Abort the commit if any of the other threads had an error. */
|
||||
err = atomic_read(&ctx->preparation_error);
|
||||
if (err) {
|
||||
abort_creds(cred);
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Make sure that all sibling tasks fulfill the no_new_privs prerequisite.
|
||||
* (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in
|
||||
* kernel/seccomp.c)
|
||||
*/
|
||||
if (ctx->set_no_new_privs)
|
||||
task_set_no_new_privs(current);
|
||||
|
||||
commit_creds(cred);
|
||||
|
||||
out:
|
||||
/* Notify the calling thread once all threads are done */
|
||||
if (atomic_dec_return(&ctx->num_unfinished) == 0)
|
||||
complete_all(&ctx->all_finished);
|
||||
}
|
||||
|
||||
/*
|
||||
* restrict_one_thread_callback - task_work callback for restricting a thread
|
||||
*
|
||||
* Calls restrict_one_thread with the struct landlock_shared_tsync_context.
|
||||
*/
|
||||
static void restrict_one_thread_callback(struct callback_head *work)
|
||||
{
|
||||
struct tsync_work *ctx = container_of(work, struct tsync_work, work);
|
||||
|
||||
restrict_one_thread(ctx->shared_ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* struct tsync_works - a growable array of per-task contexts
|
||||
*
|
||||
* The zero-initialized struct represents the empty array.
|
||||
*/
|
||||
struct tsync_works {
|
||||
struct tsync_work **works;
|
||||
size_t size;
|
||||
size_t capacity;
|
||||
};
|
||||
|
||||
/*
|
||||
* tsync_works_provide - provides a preallocated tsync_work for the given task
|
||||
*
|
||||
* This also stores a task pointer in the context and increments the reference
|
||||
* count of the task.
|
||||
*
|
||||
* This function may fail in the case where we did not preallocate sufficient
|
||||
* capacity. This can legitimately happen if new threads get started after we
|
||||
* grew the capacity.
|
||||
*
|
||||
* Returns:
|
||||
* A pointer to the preallocated context struct, with task filled in.
|
||||
*
|
||||
* NULL, if we ran out of preallocated context structs.
|
||||
*/
|
||||
static struct tsync_work *tsync_works_provide(struct tsync_works *s,
|
||||
struct task_struct *task)
|
||||
{
|
||||
struct tsync_work *ctx;
|
||||
|
||||
if (s->size >= s->capacity)
|
||||
return NULL;
|
||||
|
||||
ctx = s->works[s->size];
|
||||
s->size++;
|
||||
|
||||
ctx->task = get_task_struct(task);
|
||||
return ctx;
|
||||
}
|
||||
|
||||
/*
|
||||
* tsync_works_grow_by - preallocates space for n more contexts in s
|
||||
*
|
||||
* On a successful return, the subsequent n calls to tsync_works_provide() are
|
||||
* guaranteed to succeed. (size + n <= capacity)
|
||||
*
|
||||
* Returns:
|
||||
* -ENOMEM if the (re)allocation fails
|
||||
|
||||
* 0 if the allocation succeeds, partially succeeds, or no reallocation
|
||||
* was needed
|
||||
*/
|
||||
static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags)
|
||||
{
|
||||
size_t i;
|
||||
size_t new_capacity;
|
||||
struct tsync_work **works;
|
||||
struct tsync_work *work;
|
||||
|
||||
if (check_add_overflow(s->size, n, &new_capacity))
|
||||
return -EOVERFLOW;
|
||||
|
||||
/* No need to reallocate if s already has sufficient capacity. */
|
||||
if (new_capacity <= s->capacity)
|
||||
return 0;
|
||||
|
||||
works = krealloc_array(s->works, new_capacity, sizeof(s->works[0]),
|
||||
flags);
|
||||
if (!works)
|
||||
return -ENOMEM;
|
||||
|
||||
s->works = works;
|
||||
|
||||
for (i = s->capacity; i < new_capacity; i++) {
|
||||
work = kzalloc(sizeof(*work), flags);
|
||||
if (!work) {
|
||||
/*
|
||||
* Leave the object in a consistent state,
|
||||
* but return an error.
|
||||
*/
|
||||
s->capacity = i;
|
||||
return -ENOMEM;
|
||||
}
|
||||
s->works[i] = work;
|
||||
}
|
||||
s->capacity = new_capacity;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* tsync_works_contains - checks for presence of task in s
|
||||
*/
|
||||
static bool tsync_works_contains_task(const struct tsync_works *s,
|
||||
struct task_struct *task)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < s->size; i++)
|
||||
if (s->works[i]->task == task)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
/*
|
||||
* tsync_works_release - frees memory held by s and drops all task references
|
||||
*
|
||||
* This does not free s itself, only the data structures held by it.
|
||||
*/
|
||||
static void tsync_works_release(struct tsync_works *s)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < s->size; i++) {
|
||||
if (!s->works[i]->task)
|
||||
continue;
|
||||
|
||||
put_task_struct(s->works[i]->task);
|
||||
}
|
||||
|
||||
for (i = 0; i < s->capacity; i++)
|
||||
kfree(s->works[i]);
|
||||
kfree(s->works);
|
||||
s->works = NULL;
|
||||
s->size = 0;
|
||||
s->capacity = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* count_additional_threads - counts the sibling threads that are not in works
|
||||
*/
|
||||
static size_t count_additional_threads(const struct tsync_works *works)
|
||||
{
|
||||
struct task_struct *thread, *caller;
|
||||
size_t n = 0;
|
||||
|
||||
caller = current;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
for_each_thread(caller, thread) {
|
||||
/* Skip current, since it is initiating the sync. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
/* Skip exited threads. */
|
||||
if (thread->flags & PF_EXITING)
|
||||
continue;
|
||||
|
||||
/* Skip threads that we have already seen. */
|
||||
if (tsync_works_contains_task(works, thread))
|
||||
continue;
|
||||
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
/*
|
||||
* schedule_task_work - adds task_work for all eligible sibling threads
|
||||
* which have not been scheduled yet
|
||||
*
|
||||
* For each added task_work, atomically increments shared_ctx->num_preparing and
|
||||
* shared_ctx->num_unfinished.
|
||||
*
|
||||
* Returns:
|
||||
* true, if at least one eligible sibling thread was found
|
||||
*/
|
||||
static bool schedule_task_work(struct tsync_works *works,
|
||||
struct tsync_shared_context *shared_ctx)
|
||||
{
|
||||
int err;
|
||||
struct task_struct *thread, *caller;
|
||||
struct tsync_work *ctx;
|
||||
bool found_more_threads = false;
|
||||
|
||||
caller = current;
|
||||
|
||||
guard(rcu)();
|
||||
|
||||
for_each_thread(caller, thread) {
|
||||
/* Skip current, since it is initiating the sync. */
|
||||
if (thread == caller)
|
||||
continue;
|
||||
|
||||
/* Skip exited threads. */
|
||||
if (thread->flags & PF_EXITING)
|
||||
continue;
|
||||
|
||||
/* Skip threads that we already looked at. */
|
||||
if (tsync_works_contains_task(works, thread))
|
||||
continue;
|
||||
|
||||
/*
|
||||
* We found a sibling thread that is not doing its task_work yet, and
|
||||
* which might spawn new threads before our task work runs, so we need
|
||||
* at least one more round in the outer loop.
|
||||
*/
|
||||
found_more_threads = true;
|
||||
|
||||
ctx = tsync_works_provide(works, thread);
|
||||
if (!ctx) {
|
||||
/*
|
||||
* We ran out of preallocated contexts -- we need to try again with
|
||||
* this thread at a later time!
|
||||
* found_more_threads is already true at this point.
|
||||
*/
|
||||
break;
|
||||
}
|
||||
|
||||
ctx->shared_ctx = shared_ctx;
|
||||
|
||||
atomic_inc(&shared_ctx->num_preparing);
|
||||
atomic_inc(&shared_ctx->num_unfinished);
|
||||
|
||||
init_task_work(&ctx->work, restrict_one_thread_callback);
|
||||
err = task_work_add(thread, &ctx->work, TWA_SIGNAL);
|
||||
if (err) {
|
||||
/*
|
||||
* task_work_add() only fails if the task is about to exit. We
|
||||
* checked that earlier, but it can happen as a race. Resume
|
||||
* without setting an error, as the task is probably gone in the
|
||||
* next loop iteration. For consistency, remove the task from ctx
|
||||
* so that it does not look like we handed it a task_work.
|
||||
*/
|
||||
put_task_struct(ctx->task);
|
||||
ctx->task = NULL;
|
||||
|
||||
atomic_dec(&shared_ctx->num_preparing);
|
||||
atomic_dec(&shared_ctx->num_unfinished);
|
||||
}
|
||||
}
|
||||
|
||||
return found_more_threads;
|
||||
}
|
||||
|
||||
/*
|
||||
* cancel_tsync_works - cancel all task works where it is possible
|
||||
*
|
||||
* Task works can be canceled as long as they are still queued and have not
|
||||
* started running. If they get canceled, we decrement
|
||||
* shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two
|
||||
* completions if needed, as if the task was never scheduled.
|
||||
*/
|
||||
static void cancel_tsync_works(struct tsync_works *works,
|
||||
struct tsync_shared_context *shared_ctx)
|
||||
{
|
||||
int i;
|
||||
|
||||
for (i = 0; i < works->size; i++) {
|
||||
if (!task_work_cancel(works->works[i]->task,
|
||||
&works->works[i]->work))
|
||||
continue;
|
||||
|
||||
/* After dequeueing, act as if the task work had executed. */
|
||||
|
||||
if (atomic_dec_return(&shared_ctx->num_preparing) == 0)
|
||||
complete_all(&shared_ctx->all_prepared);
|
||||
|
||||
if (atomic_dec_return(&shared_ctx->num_unfinished) == 0)
|
||||
complete_all(&shared_ctx->all_finished);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* restrict_sibling_threads - enables a Landlock policy for all sibling threads
|
||||
*/
|
||||
int landlock_restrict_sibling_threads(const struct cred *old_cred,
|
||||
const struct cred *new_cred)
|
||||
{
|
||||
int err;
|
||||
struct tsync_shared_context shared_ctx;
|
||||
struct tsync_works works = {};
|
||||
size_t newly_discovered_threads;
|
||||
bool found_more_threads;
|
||||
|
||||
atomic_set(&shared_ctx.preparation_error, 0);
|
||||
init_completion(&shared_ctx.all_prepared);
|
||||
init_completion(&shared_ctx.ready_to_commit);
|
||||
atomic_set(&shared_ctx.num_unfinished, 1);
|
||||
init_completion(&shared_ctx.all_finished);
|
||||
shared_ctx.old_cred = old_cred;
|
||||
shared_ctx.new_cred = new_cred;
|
||||
shared_ctx.set_no_new_privs = task_no_new_privs(current);
|
||||
|
||||
/*
|
||||
* We schedule a pseudo-signal task_work for each of the calling task's
|
||||
* sibling threads. In the task work, each thread:
|
||||
*
|
||||
* 1) runs prepare_creds() and writes back the error to
|
||||
* shared_ctx.preparation_error, if needed.
|
||||
*
|
||||
* 2) signals that it's done with prepare_creds() to the calling task.
|
||||
* (completion "all_prepared").
|
||||
*
|
||||
* 3) waits for the completion "ready_to_commit". This is sent by the
|
||||
* calling task after ensuring that all sibling threads have done
|
||||
* with the "preparation" stage.
|
||||
*
|
||||
* After this barrier is reached, it's safe to read
|
||||
* shared_ctx.preparation_error.
|
||||
*
|
||||
* 4) reads shared_ctx.preparation_error and then either does commit_creds()
|
||||
* or abort_creds().
|
||||
*
|
||||
* 5) signals that it's done altogether (barrier synchronization
|
||||
* "all_finished")
|
||||
*
|
||||
* Unlike seccomp, which modifies sibling tasks directly, we do not need to
|
||||
* acquire the cred_guard_mutex and sighand->siglock:
|
||||
*
|
||||
* - As in our case, all threads are themselves exchanging their own struct
|
||||
* cred through the credentials API, no locks are needed for that.
|
||||
* - Our for_each_thread() loops are protected by RCU.
|
||||
* - We do not acquire a lock to keep the list of sibling threads stable
|
||||
* between our for_each_thread loops. If the list of available sibling
|
||||
* threads changes between these for_each_thread loops, we make up for
|
||||
* that by continuing to look for threads until they are all discovered
|
||||
* and have entered their task_work, where they are unable to spawn new
|
||||
* threads.
|
||||
*/
|
||||
do {
|
||||
/* In RCU read-lock, count the threads we need. */
|
||||
newly_discovered_threads = count_additional_threads(&works);
|
||||
|
||||
if (newly_discovered_threads == 0)
|
||||
break; /* done */
|
||||
|
||||
err = tsync_works_grow_by(&works, newly_discovered_threads,
|
||||
GFP_KERNEL_ACCOUNT);
|
||||
if (err) {
|
||||
atomic_set(&shared_ctx.preparation_error, err);
|
||||
break;
|
||||
}
|
||||
|
||||
/*
|
||||
* The "all_prepared" barrier is used locally to the loop body, this use
|
||||
* of for_each_thread(). We can reset it on each loop iteration because
|
||||
* all previous loop iterations are done with it already.
|
||||
*
|
||||
* num_preparing is initialized to 1 so that the counter can not go to 0
|
||||
* and mark the completion as done before all task works are registered.
|
||||
* We decrement it at the end of the loop body.
|
||||
*/
|
||||
atomic_set(&shared_ctx.num_preparing, 1);
|
||||
reinit_completion(&shared_ctx.all_prepared);
|
||||
|
||||
/*
|
||||
* In RCU read-lock, schedule task work on newly discovered sibling
|
||||
* tasks.
|
||||
*/
|
||||
found_more_threads = schedule_task_work(&works, &shared_ctx);
|
||||
|
||||
/*
|
||||
* Decrement num_preparing for current, to undo that we initialized it
|
||||
* to 1 a few lines above.
|
||||
*/
|
||||
if (atomic_dec_return(&shared_ctx.num_preparing) > 0) {
|
||||
if (wait_for_completion_interruptible(
|
||||
&shared_ctx.all_prepared)) {
|
||||
/* In case of interruption, we need to retry the system call. */
|
||||
atomic_set(&shared_ctx.preparation_error,
|
||||
-ERESTARTNOINTR);
|
||||
|
||||
/*
|
||||
* Cancel task works for tasks that did not start running yet,
|
||||
* and decrement all_prepared and num_unfinished accordingly.
|
||||
*/
|
||||
cancel_tsync_works(&works, &shared_ctx);
|
||||
|
||||
/*
|
||||
* The remaining task works have started running, so waiting for
|
||||
* their completion will finish.
|
||||
*/
|
||||
wait_for_completion(&shared_ctx.all_prepared);
|
||||
}
|
||||
}
|
||||
} while (found_more_threads &&
|
||||
!atomic_read(&shared_ctx.preparation_error));
|
||||
|
||||
/*
|
||||
* We now have all sibling threads blocking and in "prepared" state in the
|
||||
* task work. Ask all threads to commit.
|
||||
*/
|
||||
complete_all(&shared_ctx.ready_to_commit);
|
||||
|
||||
/*
|
||||
* Decrement num_unfinished for current, to undo that we initialized it to 1
|
||||
* at the beginning.
|
||||
*/
|
||||
if (atomic_dec_return(&shared_ctx.num_unfinished) > 0)
|
||||
wait_for_completion(&shared_ctx.all_finished);
|
||||
|
||||
tsync_works_release(&works);
|
||||
|
||||
return atomic_read(&shared_ctx.preparation_error);
|
||||
}
|
||||
16
security/landlock/tsync.h
Normal file
16
security/landlock/tsync.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
/* SPDX-License-Identifier: GPL-2.0-only */
|
||||
/*
|
||||
* Landlock - Cross-thread ruleset enforcement
|
||||
*
|
||||
* Copyright © 2025 Google LLC
|
||||
*/
|
||||
|
||||
#ifndef _SECURITY_LANDLOCK_TSYNC_H
|
||||
#define _SECURITY_LANDLOCK_TSYNC_H
|
||||
|
||||
#include <linux/cred.h>
|
||||
|
||||
int landlock_restrict_sibling_threads(const struct cred *old_cred,
|
||||
const struct cred *new_cred);
|
||||
|
||||
#endif /* _SECURITY_LANDLOCK_TSYNC_H */
|
||||
Loading…
Add table
Add a link
Reference in a new issue