From 42fc7e6543f6d17d2cf9ed3e5021f103a3d11182 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 27 Nov 2025 12:51:34 +0100 Subject: [PATCH 01/12] landlock: Multithreading support for landlock_restrict_self() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Introduce the LANDLOCK_RESTRICT_SELF_TSYNC flag. With this flag, a given Landlock ruleset is applied to all threads of the calling process, instead of only the current one. Without this flag, multithreaded userspace programs currently resort to using the nptl(7)/libpsx hack for multithreaded policy enforcement, which is also used by libcap and for setuid(2). Using this userspace-based scheme, the threads of a process enforce the same Landlock policy, but the resulting Landlock domains are still separate. The domains being separate causes multiple problems: * When using Landlock's "scoped" access rights, the domain identity is used to determine whether an operation is permitted. As a result, when using LANLDOCK_SCOPE_SIGNAL, signaling between sibling threads stops working. This is a problem for programming languages and frameworks which are inherently multithreaded (e.g. Go). * In audit logging, the domains of separate threads in a process will get logged with different domain IDs, even when they are based on the same ruleset FD, which might confuse users. Cc: Andrew G. Morgan Cc: John Johansen Cc: Paul Moore Suggested-by: Jann Horn Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20251127115136.3064948-2-gnoack@google.com [mic: Fix restrict_self_flags test, clean up Makefile, allign comments, reduce local variable scope, add missing includes] Closes: https://github.com/landlock-lsm/linux/issues/2 Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 13 + security/landlock/Makefile | 11 +- security/landlock/cred.h | 12 + security/landlock/limits.h | 2 +- security/landlock/syscalls.c | 69 ++- security/landlock/tsync.c | 561 +++++++++++++++++++ security/landlock/tsync.h | 16 + tools/testing/selftests/landlock/base_test.c | 4 +- 8 files changed, 654 insertions(+), 34 deletions(-) create mode 100644 security/landlock/tsync.c create mode 100644 security/landlock/tsync.h diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index 75fd7f5e6cc3..d5081ab4e5ef 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -117,11 +117,24 @@ struct landlock_ruleset_attr { * future nested domains, not the one being created. It can also be used * with a @ruleset_fd value of -1 to mute subdomain logs without creating a * domain. + * + * The following flag supports policy enforcement in multithreaded processes: + * + * %LANDLOCK_RESTRICT_SELF_TSYNC + * Applies the new Landlock configuration atomically to all threads of the + * current process, including the Landlock domain and logging + * configuration. This overrides the Landlock configuration of sibling + * threads, irrespective of previously established Landlock domains and + * logging configurations on these threads. + * + * If the calling thread is running with no_new_privs, this operation + * enables no_new_privs on the sibling threads as well. */ /* clang-format off */ #define LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF (1U << 0) #define LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON (1U << 1) #define LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF (1U << 2) +#define LANDLOCK_RESTRICT_SELF_TSYNC (1U << 3) /* clang-format on */ /** diff --git a/security/landlock/Makefile b/security/landlock/Makefile index 3160c2bdac1d..ffa7646d99f3 100644 --- a/security/landlock/Makefile +++ b/security/landlock/Makefile @@ -1,7 +1,14 @@ obj-$(CONFIG_SECURITY_LANDLOCK) := landlock.o -landlock-y := setup.o syscalls.o object.o ruleset.o \ - cred.o task.o fs.o +landlock-y := \ + setup.o \ + syscalls.o \ + object.o \ + ruleset.o \ + cred.o \ + task.o \ + fs.o \ + tsync.o landlock-$(CONFIG_INET) += net.o diff --git a/security/landlock/cred.h b/security/landlock/cred.h index c82fe63ec598..c10a06727eb1 100644 --- a/security/landlock/cred.h +++ b/security/landlock/cred.h @@ -26,6 +26,8 @@ * This structure is packed to minimize the size of struct * landlock_file_security. However, it is always aligned in the LSM cred blob, * see lsm_set_blob_size(). + * + * When updating this, also update landlock_cred_copy() if needed. */ struct landlock_cred_security { /** @@ -65,6 +67,16 @@ landlock_cred(const struct cred *cred) return cred->security + landlock_blob_sizes.lbs_cred; } +static inline void landlock_cred_copy(struct landlock_cred_security *dst, + const struct landlock_cred_security *src) +{ + landlock_put_ruleset(dst->domain); + + *dst = *src; + + landlock_get_ruleset(src->domain); +} + static inline struct landlock_ruleset *landlock_get_current_domain(void) { return landlock_cred(current_cred())->domain; diff --git a/security/landlock/limits.h b/security/landlock/limits.h index 65b5ff051674..eb584f47288d 100644 --- a/security/landlock/limits.h +++ b/security/landlock/limits.h @@ -31,7 +31,7 @@ #define LANDLOCK_MASK_SCOPE ((LANDLOCK_LAST_SCOPE << 1) - 1) #define LANDLOCK_NUM_SCOPE __const_hweight64(LANDLOCK_MASK_SCOPE) -#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF +#define LANDLOCK_LAST_RESTRICT_SELF LANDLOCK_RESTRICT_SELF_TSYNC #define LANDLOCK_MASK_RESTRICT_SELF ((LANDLOCK_LAST_RESTRICT_SELF << 1) - 1) /* clang-format on */ diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 0116e9f93ffe..3e4e99deb7f9 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -36,6 +36,7 @@ #include "net.h" #include "ruleset.h" #include "setup.h" +#include "tsync.h" static bool is_initialized(void) { @@ -161,7 +162,7 @@ static const struct file_operations ruleset_fops = { * Documentation/userspace-api/landlock.rst should be updated to reflect the * UAPI change. */ -const int landlock_abi_version = 7; +const int landlock_abi_version = 8; /** * sys_landlock_create_ruleset - Create a new ruleset @@ -454,9 +455,10 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, * - %LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF * - %LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON * - %LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF + * - %LANDLOCK_RESTRICT_SELF_TSYNC * - * This system call enables to enforce a Landlock ruleset on the current - * thread. Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its + * This system call enforces a Landlock ruleset on the current thread. + * Enforcing a ruleset requires that the task has %CAP_SYS_ADMIN in its * namespace or is running with no_new_privs. This avoids scenarios where * unprivileged tasks can affect the behavior of privileged children. * @@ -478,8 +480,7 @@ SYSCALL_DEFINE4(landlock_add_rule, const int, ruleset_fd, SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, flags) { - struct landlock_ruleset *new_dom, - *ruleset __free(landlock_put_ruleset) = NULL; + struct landlock_ruleset *ruleset __free(landlock_put_ruleset) = NULL; struct cred *new_cred; struct landlock_cred_security *new_llcred; bool __maybe_unused log_same_exec, log_new_exec, log_subdomains, @@ -538,33 +539,43 @@ SYSCALL_DEFINE2(landlock_restrict_self, const int, ruleset_fd, const __u32, * We could optimize this case by not calling commit_creds() if this flag * was already set, but it is not worth the complexity. */ - if (!ruleset) - return commit_creds(new_cred); + if (ruleset) { + /* + * There is no possible race condition while copying and + * manipulating the current credentials because they are + * dedicated per thread. + */ + struct landlock_ruleset *const new_dom = + landlock_merge_ruleset(new_llcred->domain, ruleset); + if (IS_ERR(new_dom)) { + abort_creds(new_cred); + return PTR_ERR(new_dom); + } - /* - * There is no possible race condition while copying and manipulating - * the current credentials because they are dedicated per thread. - */ - new_dom = landlock_merge_ruleset(new_llcred->domain, ruleset); - if (IS_ERR(new_dom)) { - abort_creds(new_cred); - return PTR_ERR(new_dom); +#ifdef CONFIG_AUDIT + new_dom->hierarchy->log_same_exec = log_same_exec; + new_dom->hierarchy->log_new_exec = log_new_exec; + if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) + new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; +#endif /* CONFIG_AUDIT */ + + /* Replaces the old (prepared) domain. */ + landlock_put_ruleset(new_llcred->domain); + new_llcred->domain = new_dom; + +#ifdef CONFIG_AUDIT + new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); +#endif /* CONFIG_AUDIT */ } -#ifdef CONFIG_AUDIT - new_dom->hierarchy->log_same_exec = log_same_exec; - new_dom->hierarchy->log_new_exec = log_new_exec; - if ((!log_same_exec && !log_new_exec) || !prev_log_subdomains) - new_dom->hierarchy->log_status = LANDLOCK_LOG_DISABLED; -#endif /* CONFIG_AUDIT */ - - /* Replaces the old (prepared) domain. */ - landlock_put_ruleset(new_llcred->domain); - new_llcred->domain = new_dom; - -#ifdef CONFIG_AUDIT - new_llcred->domain_exec |= BIT(new_dom->num_layers - 1); -#endif /* CONFIG_AUDIT */ + if (flags & LANDLOCK_RESTRICT_SELF_TSYNC) { + const int err = landlock_restrict_sibling_threads( + current_cred(), new_cred); + if (err) { + abort_creds(new_cred); + return err; + } + } return commit_creds(new_cred); } diff --git a/security/landlock/tsync.c b/security/landlock/tsync.c new file mode 100644 index 000000000000..0d2b9c646030 --- /dev/null +++ b/security/landlock/tsync.c @@ -0,0 +1,561 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Landlock - Cross-thread ruleset enforcement + * + * Copyright © 2025 Google LLC + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cred.h" +#include "tsync.h" + +/* + * Shared state between multiple threads which are enforcing Landlock rulesets + * in lockstep with each other. + */ +struct tsync_shared_context { + /* The old and tentative new creds of the calling thread. */ + const struct cred *old_cred; + const struct cred *new_cred; + + /* True if sibling tasks need to set the no_new_privs flag. */ + bool set_no_new_privs; + + /* An error encountered in preparation step, or 0. */ + atomic_t preparation_error; + + /* + * Barrier after preparation step in restrict_one_thread. + * The calling thread waits for completion. + * + * Re-initialized on every round of looking for newly spawned threads. + */ + atomic_t num_preparing; + struct completion all_prepared; + + /* Sibling threads wait for completion. */ + struct completion ready_to_commit; + + /* + * Barrier after commit step (used by syscall impl to wait for + * completion). + */ + atomic_t num_unfinished; + struct completion all_finished; +}; + +struct tsync_work { + struct callback_head work; + struct task_struct *task; + struct tsync_shared_context *shared_ctx; +}; + +/* + * restrict_one_thread - update a thread's Landlock domain in lockstep with the + * other threads in the same process + * + * When this is run, the same function gets run in all other threads in the same + * process (except for the calling thread which called landlock_restrict_self). + * The concurrently running invocations of restrict_one_thread coordinate + * through the shared ctx object to do their work in lockstep to implement + * all-or-nothing semantics for enforcing the new Landlock domain. + * + * Afterwards, depending on the presence of an error, all threads either commit + * or abort the prepared credentials. The commit operation can not fail any + * more. + */ +static void restrict_one_thread(struct tsync_shared_context *ctx) +{ + int err; + struct cred *cred = NULL; + + if (current_cred() == ctx->old_cred) { + /* + * Switch out old_cred with new_cred, if possible. + * + * In the common case, where all threads initially point to the same + * struct cred, this optimization avoids creating separate redundant + * credentials objects for each, which would all have the same contents. + * + * Note: We are intentionally dropping the const qualifier here, because + * it is required by commit_creds() and abort_creds(). + */ + cred = (struct cred *)get_cred(ctx->new_cred); + } else { + /* Else, prepare new creds and populate them. */ + cred = prepare_creds(); + + if (!cred) { + atomic_set(&ctx->preparation_error, -ENOMEM); + + /* + * Even on error, we need to adhere to the protocol and coordinate + * with concurrently running invocations. + */ + if (atomic_dec_return(&ctx->num_preparing) == 0) + complete_all(&ctx->all_prepared); + + goto out; + } + + landlock_cred_copy(landlock_cred(cred), + landlock_cred(ctx->new_cred)); + } + + /* + * Barrier: Wait until all threads are done preparing. + * After this point, we can have no more failures. + */ + if (atomic_dec_return(&ctx->num_preparing) == 0) + complete_all(&ctx->all_prepared); + + /* + * Wait for signal from calling thread that it's safe to read the + * preparation error now and we are ready to commit (or abort). + */ + wait_for_completion(&ctx->ready_to_commit); + + /* Abort the commit if any of the other threads had an error. */ + err = atomic_read(&ctx->preparation_error); + if (err) { + abort_creds(cred); + goto out; + } + + /* + * Make sure that all sibling tasks fulfill the no_new_privs prerequisite. + * (This is in line with Seccomp's SECCOMP_FILTER_FLAG_TSYNC logic in + * kernel/seccomp.c) + */ + if (ctx->set_no_new_privs) + task_set_no_new_privs(current); + + commit_creds(cred); + +out: + /* Notify the calling thread once all threads are done */ + if (atomic_dec_return(&ctx->num_unfinished) == 0) + complete_all(&ctx->all_finished); +} + +/* + * restrict_one_thread_callback - task_work callback for restricting a thread + * + * Calls restrict_one_thread with the struct landlock_shared_tsync_context. + */ +static void restrict_one_thread_callback(struct callback_head *work) +{ + struct tsync_work *ctx = container_of(work, struct tsync_work, work); + + restrict_one_thread(ctx->shared_ctx); +} + +/* + * struct tsync_works - a growable array of per-task contexts + * + * The zero-initialized struct represents the empty array. + */ +struct tsync_works { + struct tsync_work **works; + size_t size; + size_t capacity; +}; + +/* + * tsync_works_provide - provides a preallocated tsync_work for the given task + * + * This also stores a task pointer in the context and increments the reference + * count of the task. + * + * This function may fail in the case where we did not preallocate sufficient + * capacity. This can legitimately happen if new threads get started after we + * grew the capacity. + * + * Returns: + * A pointer to the preallocated context struct, with task filled in. + * + * NULL, if we ran out of preallocated context structs. + */ +static struct tsync_work *tsync_works_provide(struct tsync_works *s, + struct task_struct *task) +{ + struct tsync_work *ctx; + + if (s->size >= s->capacity) + return NULL; + + ctx = s->works[s->size]; + s->size++; + + ctx->task = get_task_struct(task); + return ctx; +} + +/* + * tsync_works_grow_by - preallocates space for n more contexts in s + * + * On a successful return, the subsequent n calls to tsync_works_provide() are + * guaranteed to succeed. (size + n <= capacity) + * + * Returns: + * -ENOMEM if the (re)allocation fails + + * 0 if the allocation succeeds, partially succeeds, or no reallocation + * was needed + */ +static int tsync_works_grow_by(struct tsync_works *s, size_t n, gfp_t flags) +{ + size_t i; + size_t new_capacity; + struct tsync_work **works; + struct tsync_work *work; + + if (check_add_overflow(s->size, n, &new_capacity)) + return -EOVERFLOW; + + /* No need to reallocate if s already has sufficient capacity. */ + if (new_capacity <= s->capacity) + return 0; + + works = krealloc_array(s->works, new_capacity, sizeof(s->works[0]), + flags); + if (!works) + return -ENOMEM; + + s->works = works; + + for (i = s->capacity; i < new_capacity; i++) { + work = kzalloc(sizeof(*work), flags); + if (!work) { + /* + * Leave the object in a consistent state, + * but return an error. + */ + s->capacity = i; + return -ENOMEM; + } + s->works[i] = work; + } + s->capacity = new_capacity; + return 0; +} + +/* + * tsync_works_contains - checks for presence of task in s + */ +static bool tsync_works_contains_task(const struct tsync_works *s, + struct task_struct *task) +{ + size_t i; + + for (i = 0; i < s->size; i++) + if (s->works[i]->task == task) + return true; + return false; +} + +/* + * tsync_works_release - frees memory held by s and drops all task references + * + * This does not free s itself, only the data structures held by it. + */ +static void tsync_works_release(struct tsync_works *s) +{ + size_t i; + + for (i = 0; i < s->size; i++) { + if (!s->works[i]->task) + continue; + + put_task_struct(s->works[i]->task); + } + + for (i = 0; i < s->capacity; i++) + kfree(s->works[i]); + kfree(s->works); + s->works = NULL; + s->size = 0; + s->capacity = 0; +} + +/* + * count_additional_threads - counts the sibling threads that are not in works + */ +static size_t count_additional_threads(const struct tsync_works *works) +{ + struct task_struct *thread, *caller; + size_t n = 0; + + caller = current; + + guard(rcu)(); + + for_each_thread(caller, thread) { + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + /* Skip exited threads. */ + if (thread->flags & PF_EXITING) + continue; + + /* Skip threads that we have already seen. */ + if (tsync_works_contains_task(works, thread)) + continue; + + n++; + } + return n; +} + +/* + * schedule_task_work - adds task_work for all eligible sibling threads + * which have not been scheduled yet + * + * For each added task_work, atomically increments shared_ctx->num_preparing and + * shared_ctx->num_unfinished. + * + * Returns: + * true, if at least one eligible sibling thread was found + */ +static bool schedule_task_work(struct tsync_works *works, + struct tsync_shared_context *shared_ctx) +{ + int err; + struct task_struct *thread, *caller; + struct tsync_work *ctx; + bool found_more_threads = false; + + caller = current; + + guard(rcu)(); + + for_each_thread(caller, thread) { + /* Skip current, since it is initiating the sync. */ + if (thread == caller) + continue; + + /* Skip exited threads. */ + if (thread->flags & PF_EXITING) + continue; + + /* Skip threads that we already looked at. */ + if (tsync_works_contains_task(works, thread)) + continue; + + /* + * We found a sibling thread that is not doing its task_work yet, and + * which might spawn new threads before our task work runs, so we need + * at least one more round in the outer loop. + */ + found_more_threads = true; + + ctx = tsync_works_provide(works, thread); + if (!ctx) { + /* + * We ran out of preallocated contexts -- we need to try again with + * this thread at a later time! + * found_more_threads is already true at this point. + */ + break; + } + + ctx->shared_ctx = shared_ctx; + + atomic_inc(&shared_ctx->num_preparing); + atomic_inc(&shared_ctx->num_unfinished); + + init_task_work(&ctx->work, restrict_one_thread_callback); + err = task_work_add(thread, &ctx->work, TWA_SIGNAL); + if (err) { + /* + * task_work_add() only fails if the task is about to exit. We + * checked that earlier, but it can happen as a race. Resume + * without setting an error, as the task is probably gone in the + * next loop iteration. For consistency, remove the task from ctx + * so that it does not look like we handed it a task_work. + */ + put_task_struct(ctx->task); + ctx->task = NULL; + + atomic_dec(&shared_ctx->num_preparing); + atomic_dec(&shared_ctx->num_unfinished); + } + } + + return found_more_threads; +} + +/* + * cancel_tsync_works - cancel all task works where it is possible + * + * Task works can be canceled as long as they are still queued and have not + * started running. If they get canceled, we decrement + * shared_ctx->num_preparing and shared_ctx->num_unfished and mark the two + * completions if needed, as if the task was never scheduled. + */ +static void cancel_tsync_works(struct tsync_works *works, + struct tsync_shared_context *shared_ctx) +{ + int i; + + for (i = 0; i < works->size; i++) { + if (!task_work_cancel(works->works[i]->task, + &works->works[i]->work)) + continue; + + /* After dequeueing, act as if the task work had executed. */ + + if (atomic_dec_return(&shared_ctx->num_preparing) == 0) + complete_all(&shared_ctx->all_prepared); + + if (atomic_dec_return(&shared_ctx->num_unfinished) == 0) + complete_all(&shared_ctx->all_finished); + } +} + +/* + * restrict_sibling_threads - enables a Landlock policy for all sibling threads + */ +int landlock_restrict_sibling_threads(const struct cred *old_cred, + const struct cred *new_cred) +{ + int err; + struct tsync_shared_context shared_ctx; + struct tsync_works works = {}; + size_t newly_discovered_threads; + bool found_more_threads; + + atomic_set(&shared_ctx.preparation_error, 0); + init_completion(&shared_ctx.all_prepared); + init_completion(&shared_ctx.ready_to_commit); + atomic_set(&shared_ctx.num_unfinished, 1); + init_completion(&shared_ctx.all_finished); + shared_ctx.old_cred = old_cred; + shared_ctx.new_cred = new_cred; + shared_ctx.set_no_new_privs = task_no_new_privs(current); + + /* + * We schedule a pseudo-signal task_work for each of the calling task's + * sibling threads. In the task work, each thread: + * + * 1) runs prepare_creds() and writes back the error to + * shared_ctx.preparation_error, if needed. + * + * 2) signals that it's done with prepare_creds() to the calling task. + * (completion "all_prepared"). + * + * 3) waits for the completion "ready_to_commit". This is sent by the + * calling task after ensuring that all sibling threads have done + * with the "preparation" stage. + * + * After this barrier is reached, it's safe to read + * shared_ctx.preparation_error. + * + * 4) reads shared_ctx.preparation_error and then either does commit_creds() + * or abort_creds(). + * + * 5) signals that it's done altogether (barrier synchronization + * "all_finished") + * + * Unlike seccomp, which modifies sibling tasks directly, we do not need to + * acquire the cred_guard_mutex and sighand->siglock: + * + * - As in our case, all threads are themselves exchanging their own struct + * cred through the credentials API, no locks are needed for that. + * - Our for_each_thread() loops are protected by RCU. + * - We do not acquire a lock to keep the list of sibling threads stable + * between our for_each_thread loops. If the list of available sibling + * threads changes between these for_each_thread loops, we make up for + * that by continuing to look for threads until they are all discovered + * and have entered their task_work, where they are unable to spawn new + * threads. + */ + do { + /* In RCU read-lock, count the threads we need. */ + newly_discovered_threads = count_additional_threads(&works); + + if (newly_discovered_threads == 0) + break; /* done */ + + err = tsync_works_grow_by(&works, newly_discovered_threads, + GFP_KERNEL_ACCOUNT); + if (err) { + atomic_set(&shared_ctx.preparation_error, err); + break; + } + + /* + * The "all_prepared" barrier is used locally to the loop body, this use + * of for_each_thread(). We can reset it on each loop iteration because + * all previous loop iterations are done with it already. + * + * num_preparing is initialized to 1 so that the counter can not go to 0 + * and mark the completion as done before all task works are registered. + * We decrement it at the end of the loop body. + */ + atomic_set(&shared_ctx.num_preparing, 1); + reinit_completion(&shared_ctx.all_prepared); + + /* + * In RCU read-lock, schedule task work on newly discovered sibling + * tasks. + */ + found_more_threads = schedule_task_work(&works, &shared_ctx); + + /* + * Decrement num_preparing for current, to undo that we initialized it + * to 1 a few lines above. + */ + if (atomic_dec_return(&shared_ctx.num_preparing) > 0) { + if (wait_for_completion_interruptible( + &shared_ctx.all_prepared)) { + /* In case of interruption, we need to retry the system call. */ + atomic_set(&shared_ctx.preparation_error, + -ERESTARTNOINTR); + + /* + * Cancel task works for tasks that did not start running yet, + * and decrement all_prepared and num_unfinished accordingly. + */ + cancel_tsync_works(&works, &shared_ctx); + + /* + * The remaining task works have started running, so waiting for + * their completion will finish. + */ + wait_for_completion(&shared_ctx.all_prepared); + } + } + } while (found_more_threads && + !atomic_read(&shared_ctx.preparation_error)); + + /* + * We now have all sibling threads blocking and in "prepared" state in the + * task work. Ask all threads to commit. + */ + complete_all(&shared_ctx.ready_to_commit); + + /* + * Decrement num_unfinished for current, to undo that we initialized it to 1 + * at the beginning. + */ + if (atomic_dec_return(&shared_ctx.num_unfinished) > 0) + wait_for_completion(&shared_ctx.all_finished); + + tsync_works_release(&works); + + return atomic_read(&shared_ctx.preparation_error); +} diff --git a/security/landlock/tsync.h b/security/landlock/tsync.h new file mode 100644 index 000000000000..ef86bb61c2f6 --- /dev/null +++ b/security/landlock/tsync.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Landlock - Cross-thread ruleset enforcement + * + * Copyright © 2025 Google LLC + */ + +#ifndef _SECURITY_LANDLOCK_TSYNC_H +#define _SECURITY_LANDLOCK_TSYNC_H + +#include + +int landlock_restrict_sibling_threads(const struct cred *old_cred, + const struct cred *new_cred); + +#endif /* _SECURITY_LANDLOCK_TSYNC_H */ diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index 7b69002239d7..fdbb672009ac 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -76,7 +76,7 @@ TEST(abi_version) const struct landlock_ruleset_attr ruleset_attr = { .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE, }; - ASSERT_EQ(7, landlock_create_ruleset(NULL, 0, + ASSERT_EQ(8, landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_VERSION)); ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, @@ -306,7 +306,7 @@ TEST(restrict_self_fd_flags) TEST(restrict_self_flags) { - const __u32 last_flag = LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF; + const __u32 last_flag = LANDLOCK_RESTRICT_SELF_TSYNC; /* Tests invalid flag combinations. */ From 50c058e3eafe31a5197d4cffb599f2f5f165d4eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 27 Nov 2025 12:51:35 +0100 Subject: [PATCH 02/12] selftests/landlock: Add LANDLOCK_RESTRICT_SELF_TSYNC tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Exercise various scenarios where Landlock domains are enforced across all of a processes' threads. Test coverage for security/landlock is 91.6% of 2130 lines according to LLVM 21. Cc: Andrew G. Morgan Cc: John Johansen Cc: Paul Moore Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20251127115136.3064948-3-gnoack@google.com [mic: Fix subject, use EXPECT_EQ(close()), make helpers static, add test coverage] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/base_test.c | 4 +- tools/testing/selftests/landlock/tsync_test.c | 161 ++++++++++++++++++ 2 files changed, 163 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/landlock/tsync_test.c diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c index fdbb672009ac..0fea236ef4bd 100644 --- a/tools/testing/selftests/landlock/base_test.c +++ b/tools/testing/selftests/landlock/base_test.c @@ -288,7 +288,7 @@ TEST(restrict_self_fd) EXPECT_EQ(EBADFD, errno); } -TEST(restrict_self_fd_flags) +TEST(restrict_self_fd_logging_flags) { int fd; @@ -304,7 +304,7 @@ TEST(restrict_self_fd_flags) EXPECT_EQ(EBADFD, errno); } -TEST(restrict_self_flags) +TEST(restrict_self_logging_flags) { const __u32 last_flag = LANDLOCK_RESTRICT_SELF_TSYNC; diff --git a/tools/testing/selftests/landlock/tsync_test.c b/tools/testing/selftests/landlock/tsync_test.c new file mode 100644 index 000000000000..37ef0d2270db --- /dev/null +++ b/tools/testing/selftests/landlock/tsync_test.c @@ -0,0 +1,161 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock tests - Enforcing the same restrictions across multiple threads + * + * Copyright © 2025 Günther Noack + */ + +#define _GNU_SOURCE +#include +#include +#include + +#include "common.h" + +/* create_ruleset - Create a simple ruleset FD common to all tests */ +static int create_ruleset(struct __test_metadata *const _metadata) +{ + struct landlock_ruleset_attr ruleset_attr = { + .handled_access_fs = (LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_TRUNCATE), + }; + const int ruleset_fd = + landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0); + + ASSERT_LE(0, ruleset_fd) + { + TH_LOG("landlock_create_ruleset: %s", strerror(errno)); + } + return ruleset_fd; +} + +TEST(single_threaded_success) +{ + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + EXPECT_EQ(0, close(ruleset_fd)); +} + +static void store_no_new_privs(void *data) +{ + bool *nnp = data; + + if (!nnp) + return; + *nnp = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); +} + +static void *idle(void *data) +{ + pthread_cleanup_push(store_no_new_privs, data); + + while (true) + sleep(1); + + pthread_cleanup_pop(1); +} + +TEST(multi_threaded_success) +{ + pthread_t t1, t2; + bool no_new_privs1, no_new_privs2; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + ASSERT_EQ(0, pthread_create(&t1, NULL, idle, &no_new_privs1)); + ASSERT_EQ(0, pthread_create(&t2, NULL, idle, &no_new_privs2)); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + ASSERT_EQ(0, pthread_cancel(t1)); + ASSERT_EQ(0, pthread_cancel(t2)); + ASSERT_EQ(0, pthread_join(t1, NULL)); + ASSERT_EQ(0, pthread_join(t2, NULL)); + + /* The no_new_privs flag was implicitly enabled on all threads. */ + EXPECT_TRUE(no_new_privs1); + EXPECT_TRUE(no_new_privs2); + + EXPECT_EQ(0, close(ruleset_fd)); +} + +TEST(multi_threaded_success_despite_diverging_domains) +{ + pthread_t t1, t2; + const int ruleset_fd = create_ruleset(_metadata); + + disable_caps(_metadata); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + + ASSERT_EQ(0, pthread_create(&t1, NULL, idle, NULL)); + ASSERT_EQ(0, pthread_create(&t2, NULL, idle, NULL)); + + /* + * The main thread enforces a ruleset, + * thereby bringing the threads' Landlock domains out of sync. + */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0)); + + /* Still, TSYNC succeeds, bringing the threads in sync again. */ + EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC)); + + ASSERT_EQ(0, pthread_cancel(t1)); + ASSERT_EQ(0, pthread_cancel(t2)); + ASSERT_EQ(0, pthread_join(t1, NULL)); + ASSERT_EQ(0, pthread_join(t2, NULL)); + EXPECT_EQ(0, close(ruleset_fd)); +} + +struct thread_restrict_data { + pthread_t t; + int ruleset_fd; + int result; +}; + +static void *thread_restrict(void *data) +{ + struct thread_restrict_data *d = data; + + d->result = landlock_restrict_self(d->ruleset_fd, + LANDLOCK_RESTRICT_SELF_TSYNC); + return NULL; +} + +TEST(competing_enablement) +{ + const int ruleset_fd = create_ruleset(_metadata); + struct thread_restrict_data d[] = { + { .ruleset_fd = ruleset_fd }, + { .ruleset_fd = ruleset_fd }, + }; + + disable_caps(_metadata); + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)); + ASSERT_EQ(0, pthread_create(&d[0].t, NULL, thread_restrict, &d[0])); + ASSERT_EQ(0, pthread_create(&d[1].t, NULL, thread_restrict, &d[1])); + + /* Wait for threads to finish. */ + ASSERT_EQ(0, pthread_join(d[0].t, NULL)); + ASSERT_EQ(0, pthread_join(d[1].t, NULL)); + + /* Expect that both succeeded. */ + EXPECT_EQ(0, d[0].result); + EXPECT_EQ(0, d[1].result); + + EXPECT_EQ(0, close(ruleset_fd)); +} + +TEST_HARNESS_MAIN From 39508405f6e6c8ce8a0f4bf93b344610d9051043 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Thu, 27 Nov 2025 12:51:36 +0100 Subject: [PATCH 03/12] landlock: Document LANDLOCK_RESTRICT_SELF_TSYNC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add documentation for LANDLOCK_RESTRICT_SELF_TSYNC. It does not need to go into the main example, but it has a section in the ABI compatibility notes. In the HTML rendering, the main reference is the system call documentation, which is included from the landlock.h header file. Cc: Andrew G. Morgan Cc: John Johansen Cc: Paul Moore Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20251127115136.3064948-4-gnoack@google.com [mic: Update date] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 1d0c2c15c22e..90bb0778666d 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: March 2025 +:Date: November 2025 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -604,6 +604,14 @@ Landlock audit events with the ``LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF``, sys_landlock_restrict_self(). See Documentation/admin-guide/LSM/landlock.rst for more details on audit. +Thread synchronization (ABI < 8) +-------------------------------- + +Starting with the Landlock ABI version 8, it is now possible to +enforce Landlock rulesets across all threads of the calling process +using the ``LANDLOCK_RESTRICT_SELF_TSYNC`` flag passed to +sys_landlock_restrict_self(). + .. _kernel_support: Kernel support From bbb6f53e905ca119f99ccab8496f8921d9db9c50 Mon Sep 17 00:00:00 2001 From: Matthieu Buffet Date: Fri, 12 Dec 2025 17:36:57 +0100 Subject: [PATCH 04/12] landlock: Minor reword of docs for TCP access rights MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Move ABI requirement next to each access right to prepare adding more access rights; - Mention the possibility to remove the random component of a socket's ephemeral port choice within the netns-wide ephemeral port range, since it allows choosing the "random" ephemeral port. Signed-off-by: Matthieu Buffet Link: https://lore.kernel.org/r/20251212163704.142301-2-matthieu@buffet.re Signed-off-by: Mickaël Salaün --- include/uapi/linux/landlock.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/include/uapi/linux/landlock.h b/include/uapi/linux/landlock.h index d5081ab4e5ef..f88fa1f68b77 100644 --- a/include/uapi/linux/landlock.h +++ b/include/uapi/linux/landlock.h @@ -195,11 +195,13 @@ struct landlock_net_port_attr { * It should be noted that port 0 passed to :manpage:`bind(2)` will bind * to an available port from the ephemeral port range. This can be * configured with the ``/proc/sys/net/ipv4/ip_local_port_range`` sysctl - * (also used for IPv6). + * (also used for IPv6), and within that range, on a per-socket basis + * with ``setsockopt(IP_LOCAL_PORT_RANGE)``. * - * A Landlock rule with port 0 and the ``LANDLOCK_ACCESS_NET_BIND_TCP`` + * A Landlock rule with port 0 and the %LANDLOCK_ACCESS_NET_BIND_TCP * right means that requesting to bind on port 0 is allowed and it will - * automatically translate to binding on the related port range. + * automatically translate to binding on a kernel-assigned ephemeral + * port. */ __u64 port; }; @@ -342,13 +344,12 @@ struct landlock_net_port_attr { * These flags enable to restrict a sandboxed process to a set of network * actions. * - * This is supported since Landlock ABI version 4. - * * The following access rights apply to TCP port numbers: * - * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind a TCP socket to a local port. - * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect an active TCP socket to - * a remote port. + * - %LANDLOCK_ACCESS_NET_BIND_TCP: Bind TCP sockets to the given local + * port. Support added in Landlock ABI version 4. + * - %LANDLOCK_ACCESS_NET_CONNECT_TCP: Connect TCP sockets to the given + * remote port. Support added in Landlock ABI version 4. */ /* clang-format off */ #define LANDLOCK_ACCESS_NET_BIND_TCP (1ULL << 0) From d90ba69e3335aba96c25a0ea7d46c5c115cd4756 Mon Sep 17 00:00:00 2001 From: Matthieu Buffet Date: Fri, 12 Dec 2025 17:36:58 +0100 Subject: [PATCH 05/12] landlock: Refactor TCP socket type check MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the socket type check earlier, so that we will later be able to add elseifs for other types. Ordering of checks (socket is of a type we enforce restrictions on) / (current creds have Landlock restrictions) should not change anything. Signed-off-by: Matthieu Buffet Link: https://lore.kernel.org/r/20251212163704.142301-3-matthieu@buffet.re Signed-off-by: Mickaël Salaün --- security/landlock/net.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/security/landlock/net.c b/security/landlock/net.c index e6367e30e5b0..59438285e73b 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -62,9 +62,6 @@ static int current_check_access_socket(struct socket *const sock, if (!subject) return 0; - if (!sk_is_tcp(sock->sk)) - return 0; - /* Checks for minimal header length to safely read sa_family. */ if (addrlen < offsetofend(typeof(*address), sa_family)) return -EINVAL; @@ -214,16 +211,30 @@ static int current_check_access_socket(struct socket *const sock, static int hook_socket_bind(struct socket *const sock, struct sockaddr *const address, const int addrlen) { + access_mask_t access_request; + + if (sk_is_tcp(sock->sk)) + access_request = LANDLOCK_ACCESS_NET_BIND_TCP; + else + return 0; + return current_check_access_socket(sock, address, addrlen, - LANDLOCK_ACCESS_NET_BIND_TCP); + access_request); } static int hook_socket_connect(struct socket *const sock, struct sockaddr *const address, const int addrlen) { + access_mask_t access_request; + + if (sk_is_tcp(sock->sk)) + access_request = LANDLOCK_ACCESS_NET_CONNECT_TCP; + else + return 0; + return current_check_access_socket(sock, address, addrlen, - LANDLOCK_ACCESS_NET_CONNECT_TCP); + access_request); } static struct security_hook_list landlock_hooks[] __ro_after_init = { From 6100f2904e0ea1f2c832ab6e93573fae47d3b13e Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 27 Jan 2026 19:18:10 -0800 Subject: [PATCH 06/12] landlock: Add backwards compatibility for restrict flags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add backwards compatibility handling for the restrict flags introduced in ABI version 7. This is shown as a separate code block (similar to the ruleset_attr handling in the switch statement) because restrict flags are passed to landlock_restrict_self() rather than being part of the ruleset attributes. Also fix misleading description of the /usr rule which incorrectly stated it "only allow[s] reading" when the code actually allows both reading and executing (LANDLOCK_ACCESS_FS_EXECUTE is included in allowed_access). Signed-off-by: Samasth Norway Ananda Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260128031814.2945394-2-samasth.norway.ananda@oracle.com [mic: Rebased and fixed conflict] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 32 +++++++++++++++++------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 90bb0778666d..2c1af0c930d4 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -8,7 +8,7 @@ Landlock: unprivileged access control ===================================== :Author: Mickaël Salaün -:Date: November 2025 +:Date: January 2026 The goal of Landlock is to enable restriction of ambient rights (e.g. global filesystem or network access) for a set of processes. Because Landlock @@ -142,11 +142,11 @@ This enables the creation of an inclusive ruleset that will contain our rules. } We can now add a new rule to this ruleset thanks to the returned file -descriptor referring to this ruleset. The rule will only allow reading the -file hierarchy ``/usr``. Without another rule, write actions would then be -denied by the ruleset. To add ``/usr`` to the ruleset, we open it with the -``O_PATH`` flag and fill the &struct landlock_path_beneath_attr with this file -descriptor. +descriptor referring to this ruleset. The rule will allow reading and +executing the file hierarchy ``/usr``. Without another rule, write actions +would then be denied by the ruleset. To add ``/usr`` to the ruleset, we open +it with the ``O_PATH`` flag and fill the &struct landlock_path_beneath_attr with +this file descriptor. .. code-block:: c @@ -191,10 +191,24 @@ number for a specific action: HTTPS connections. err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT, &net_port, 0); +When passing a non-zero ``flags`` argument to ``landlock_restrict_self()``, a +similar backwards compatibility check is needed for the restrict flags +(see sys_landlock_restrict_self() documentation for available flags): + +.. code-block:: c + + __u32 restrict_flags = LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON; + if (abi < 7) { + /* Clear logging flags unsupported before ABI 7. */ + restrict_flags &= ~(LANDLOCK_RESTRICT_SELF_LOG_SAME_EXEC_OFF | + LANDLOCK_RESTRICT_SELF_LOG_NEW_EXEC_ON | + LANDLOCK_RESTRICT_SELF_LOG_SUBDOMAINS_OFF); + } + The next step is to restrict the current thread from gaining more privileges (e.g. through a SUID binary). We now have a ruleset with the first rule -allowing read access to ``/usr`` while denying all other handled accesses for -the filesystem, and a second rule allowing HTTPS connections. +allowing read and execute access to ``/usr`` while denying all other handled +accesses for the filesystem, and a second rule allowing HTTPS connections. .. code-block:: c @@ -208,7 +222,7 @@ The current thread is now ready to sandbox itself with the ruleset. .. code-block:: c - if (landlock_restrict_self(ruleset_fd, 0)) { + if (landlock_restrict_self(ruleset_fd, restrict_flags)) { perror("Failed to enforce ruleset"); close(ruleset_fd); return 1; From fe72ce6710cba088b67e3279de87d7341fafc357 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 27 Jan 2026 19:18:11 -0800 Subject: [PATCH 07/12] landlock: Add errata documentation section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add errata section with code examples for querying errata and a warning that most applications should not check errata. Use kernel-doc directives to include errata descriptions from the header files instead of manual links. Also enhance existing DOC sections in security/landlock/errata/abi-*.h files with Impact sections, and update the code comment in syscalls.c to remind developers to update errata documentation when applicable. This addresses the gap where the kernel implements errata tracking but provides no user-facing documentation on how to use it, while improving the existing technical documentation in-place rather than duplicating it. Signed-off-by: Samasth Norway Ananda Reviewed-by: Günther Noack Link: https://lore.kernel.org/r/20260128031814.2945394-3-samasth.norway.ananda@oracle.com [mic: Cosmetic fix] Signed-off-by: Mickaël Salaün --- Documentation/userspace-api/landlock.rst | 65 ++++++++++++++++++++++-- security/landlock/errata/abi-1.h | 8 +++ security/landlock/errata/abi-4.h | 7 +++ security/landlock/errata/abi-6.h | 10 ++++ security/landlock/syscalls.c | 4 +- 5 files changed, 90 insertions(+), 4 deletions(-) diff --git a/Documentation/userspace-api/landlock.rst b/Documentation/userspace-api/landlock.rst index 2c1af0c930d4..13134bccdd39 100644 --- a/Documentation/userspace-api/landlock.rst +++ b/Documentation/userspace-api/landlock.rst @@ -445,9 +445,68 @@ system call: printf("Landlock supports LANDLOCK_ACCESS_FS_REFER.\n"); } -The following kernel interfaces are implicitly supported by the first ABI -version. Features only supported from a specific version are explicitly marked -as such. +All Landlock kernel interfaces are supported by the first ABI version unless +explicitly noted in their documentation. + +Landlock errata +--------------- + +In addition to ABI versions, Landlock provides an errata mechanism to track +fixes for issues that may affect backwards compatibility or require userspace +awareness. The errata bitmask can be queried using: + +.. code-block:: c + + int errata; + + errata = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_ERRATA); + if (errata < 0) { + /* Landlock not available or disabled */ + return 0; + } + +The returned value is a bitmask where each bit represents a specific erratum. +If bit N is set (``errata & (1 << (N - 1))``), then erratum N has been fixed +in the running kernel. + +.. warning:: + + **Most applications should NOT check errata.** In 99.9% of cases, checking + errata is unnecessary, increases code complexity, and can potentially + decrease protection if misused. For example, disabling the sandbox when an + erratum is not fixed could leave the system less secure than using + Landlock's best-effort protection. When in doubt, ignore errata. + +.. kernel-doc:: security/landlock/errata/abi-4.h + :doc: erratum_1 + +.. kernel-doc:: security/landlock/errata/abi-6.h + :doc: erratum_2 + +.. kernel-doc:: security/landlock/errata/abi-1.h + :doc: erratum_3 + +How to check for errata +~~~~~~~~~~~~~~~~~~~~~~~ + +If you determine that your application needs to check for specific errata, +use this pattern: + +.. code-block:: c + + int errata = landlock_create_ruleset(NULL, 0, LANDLOCK_CREATE_RULESET_ERRATA); + if (errata >= 0) { + /* Check for specific erratum (1-indexed) */ + if (errata & (1 << (erratum_number - 1))) { + /* Erratum N is fixed in this kernel */ + } else { + /* Erratum N is NOT fixed - consider implications for your use case */ + } + } + +**Important:** Only check errata if your application specifically relies on +behavior that changed due to the fix. The fixes generally make Landlock less +restrictive or more correct, not more restrictive. Kernel interface ================ diff --git a/security/landlock/errata/abi-1.h b/security/landlock/errata/abi-1.h index e8a2bff2e5b6..3f099555f059 100644 --- a/security/landlock/errata/abi-1.h +++ b/security/landlock/errata/abi-1.h @@ -12,5 +12,13 @@ * hierarchy down to its filesystem root and those from the related mount point * hierarchy. This prevents access right widening through rename or link * actions. + * + * Impact: + * + * Without this fix, it was possible to widen access rights through rename or + * link actions involving disconnected directories, potentially bypassing + * ``LANDLOCK_ACCESS_FS_REFER`` restrictions. This could allow privilege + * escalation in complex mount scenarios where directories become disconnected + * from their original mount points. */ LANDLOCK_ERRATUM(3) diff --git a/security/landlock/errata/abi-4.h b/security/landlock/errata/abi-4.h index c052ee54f89f..fe11ec7d7ddf 100644 --- a/security/landlock/errata/abi-4.h +++ b/security/landlock/errata/abi-4.h @@ -11,5 +11,12 @@ * :manpage:`bind(2)` and :manpage:`connect(2)` operations. This change ensures * that only TCP sockets are subject to TCP access rights, allowing other * protocols to operate without unnecessary restrictions. + * + * Impact: + * + * In kernels without this fix, using ``LANDLOCK_ACCESS_NET_BIND_TCP`` or + * ``LANDLOCK_ACCESS_NET_CONNECT_TCP`` would incorrectly restrict non-TCP + * stream protocols (SMC, MPTCP, SCTP), potentially breaking applications + * that rely on these protocols while using Landlock network restrictions. */ LANDLOCK_ERRATUM(1) diff --git a/security/landlock/errata/abi-6.h b/security/landlock/errata/abi-6.h index 5113a829f87e..5cb1475c7ea8 100644 --- a/security/landlock/errata/abi-6.h +++ b/security/landlock/errata/abi-6.h @@ -15,5 +15,15 @@ * interaction between threads of the same process should always be allowed. * This change ensures that any thread is allowed to send signals to any other * thread within the same process, regardless of their domain. + * + * Impact: + * + * This problem only manifests when the userspace process is itself using + * :manpage:`libpsx(3)` or an equivalent mechanism to enforce a Landlock policy + * on multiple already-running threads at once. Programs which enforce a + * Landlock policy at startup time and only then become multithreaded are not + * affected. Without this fix, signal scoping could break multi-threaded + * applications that expect threads within the same process to freely signal + * each other. */ LANDLOCK_ERRATUM(2) diff --git a/security/landlock/syscalls.c b/security/landlock/syscalls.c index 3e4e99deb7f9..0d66a68677b7 100644 --- a/security/landlock/syscalls.c +++ b/security/landlock/syscalls.c @@ -158,9 +158,11 @@ static const struct file_operations ruleset_fops = { /* * The Landlock ABI version should be incremented for each new Landlock-related * user space visible change (e.g. Landlock syscalls). This version should - * only be incremented once per Linux release, and the date in + * only be incremented once per Linux release. When incrementing, the date in * Documentation/userspace-api/landlock.rst should be updated to reflect the * UAPI change. + * If the change involves a fix that requires userspace awareness, also update + * the errata documentation in Documentation/userspace-api/landlock.rst . */ const int landlock_abi_version = 8; From de4b09abf088ba0a6a0bebb8b618fd29b9ce5c35 Mon Sep 17 00:00:00 2001 From: Samasth Norway Ananda Date: Tue, 27 Jan 2026 19:18:12 -0800 Subject: [PATCH 08/12] landlock: Document audit blocker field format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add comprehensive documentation for the ``blockers`` field format in AUDIT_LANDLOCK_ACCESS records, including all possible prefixes (fs., net., scope.) and their meanings. Also fix a typo and update the documentation date to reflect these changes. Signed-off-by: Samasth Norway Ananda Link: https://lore.kernel.org/r/20260128031814.2945394-4-samasth.norway.ananda@oracle.com Signed-off-by: Mickaël Salaün --- Documentation/admin-guide/LSM/landlock.rst | 35 ++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/Documentation/admin-guide/LSM/landlock.rst b/Documentation/admin-guide/LSM/landlock.rst index 9e61607def08..9923874e2156 100644 --- a/Documentation/admin-guide/LSM/landlock.rst +++ b/Documentation/admin-guide/LSM/landlock.rst @@ -6,7 +6,7 @@ Landlock: system-wide management ================================ :Author: Mickaël Salaün -:Date: March 2025 +:Date: January 2026 Landlock can leverage the audit framework to log events. @@ -38,6 +38,37 @@ AUDIT_LANDLOCK_ACCESS domain=195ba459b blockers=fs.refer path="/usr/bin" dev="vda2" ino=351 domain=195ba459b blockers=fs.make_reg,fs.refer path="/usr/local" dev="vda2" ino=365 + + The ``blockers`` field uses dot-separated prefixes to indicate the type of + restriction that caused the denial: + + **fs.*** - Filesystem access rights (ABI 1+): + - fs.execute, fs.write_file, fs.read_file, fs.read_dir + - fs.remove_dir, fs.remove_file + - fs.make_char, fs.make_dir, fs.make_reg, fs.make_sock + - fs.make_fifo, fs.make_block, fs.make_sym + - fs.refer (ABI 2+) + - fs.truncate (ABI 3+) + - fs.ioctl_dev (ABI 5+) + + **net.*** - Network access rights (ABI 4+): + - net.bind_tcp - TCP port binding was denied + - net.connect_tcp - TCP connection was denied + + **scope.*** - IPC scoping restrictions (ABI 6+): + - scope.abstract_unix_socket - Abstract UNIX socket connection denied + - scope.signal - Signal sending denied + + Multiple blockers can appear in a single event (comma-separated) when + multiple access rights are missing. For example, creating a regular file + in a directory that lacks both ``make_reg`` and ``refer`` rights would show + ``blockers=fs.make_reg,fs.refer``. + + The object identification fields (path, dev, ino for filesystem; opid, + ocomm for signals) depend on the type of access being blocked and provide + context about what resource was involved in the denial. + + AUDIT_LANDLOCK_DOMAIN This record type describes the status of a Landlock domain. The ``status`` field can be either ``allocated`` or ``deallocated``. @@ -86,7 +117,7 @@ This command generates two events, each identified with a unique serial number following a timestamp (``msg=audit(1729738800.268:30)``). The first event (serial ``30``) contains 4 records. The first record (``type=LANDLOCK_ACCESS``) shows an access denied by the domain `1a6fdc66f`. -The cause of this denial is signal scopping restriction +The cause of this denial is signal scoping restriction (``blockers=scope.signal``). The process that would have receive this signal is the init process (``opid=1 ocomm="systemd"``). From 9adbe8935152c511c1e43a47d69f44f0e969afc8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 6 Feb 2026 16:11:53 +0100 Subject: [PATCH 09/12] selftests/landlock: Add filesystem access benchmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fs_bench benchmarks the performance of Landlock's path walk by exercising it in a scenario that amplifies Landlock's overhead: * Create a large number of nested directories * Enforce a Landlock policy in which a rule is associated with each of these subdirectories * Benchmark openat() applied to the deepest directory, forcing Landlock to walk the entire path. Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20260206151154.97915-3-gnoack3000@gmail.com [mic: Fix missing mode with O_CREAT, improve text consistency, sort includes] Signed-off-by: Mickaël Salaün --- tools/testing/selftests/landlock/.gitignore | 1 + tools/testing/selftests/landlock/Makefile | 1 + tools/testing/selftests/landlock/fs_bench.c | 214 ++++++++++++++++++++ 3 files changed, 216 insertions(+) create mode 100644 tools/testing/selftests/landlock/fs_bench.c diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore index a820329cae0d..1974e17a2611 100644 --- a/tools/testing/selftests/landlock/.gitignore +++ b/tools/testing/selftests/landlock/.gitignore @@ -1,4 +1,5 @@ /*_test +/fs_bench /sandbox-and-launch /true /wait-pipe diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile index 044b83bde16e..fc43225d319a 100644 --- a/tools/testing/selftests/landlock/Makefile +++ b/tools/testing/selftests/landlock/Makefile @@ -9,6 +9,7 @@ LOCAL_HDRS += $(wildcard *.h) src_test := $(wildcard *_test.c) TEST_GEN_PROGS := $(src_test:.c=) +TEST_GEN_PROGS += fs_bench TEST_GEN_PROGS_EXTENDED := \ true \ diff --git a/tools/testing/selftests/landlock/fs_bench.c b/tools/testing/selftests/landlock/fs_bench.c new file mode 100644 index 000000000000..d13a88dcd1ed --- /dev/null +++ b/tools/testing/selftests/landlock/fs_bench.c @@ -0,0 +1,214 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Landlock filesystem benchmark + * + * This program benchmarks the time required for file access checks. We use a + * large number (-d flag) of nested directories where each directory inode has + * an associated Landlock rule, and we repeatedly (-n flag) exercise a file + * access for which Landlock has to walk the path all the way up to the root. + * + * With an increasing number of nested subdirectories, Landlock's portion of the + * overall system call time increases, which makes the effects of Landlock + * refactorings more measurable. + * + * This benchmark does *not* measure the building of the Landlock ruleset. The + * time required to add all these rules is not large enough to be easily + * measurable. A separate benchmark tool would be better to test that, and that + * tool could then also use a simpler file system layout. + * + * Copyright © 2026 Google LLC + */ + +#define _GNU_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "wrappers.h" + +static void usage(const char *const argv0) +{ + printf("Usage:\n"); + printf(" %s [OPTIONS]\n", argv0); + printf("\n"); + printf(" Benchmark expensive Landlock checks for D nested dirs\n"); + printf("\n"); + printf("Options:\n"); + printf(" -h help\n"); + printf(" -L disable Landlock (as a baseline)\n"); + printf(" -d D set directory depth to D\n"); + printf(" -n N set number of benchmark iterations to N\n"); +} + +/* + * Build a deep directory, enforce Landlock and return the FD to the + * deepest dir. On any failure, exit the process with an error. + */ +static int build_directory(size_t depth, const bool use_landlock) +{ + const char *path = "d"; /* directory name */ + int abi, ruleset_fd, curr, prev; + + if (use_landlock) { + abi = landlock_create_ruleset(NULL, 0, + LANDLOCK_CREATE_RULESET_VERSION); + if (abi < 7) + err(1, "Landlock ABI too low: got %d, wanted 7+", abi); + } + + ruleset_fd = -1; + if (use_landlock) { + struct landlock_ruleset_attr attr = { + .handled_access_fs = LANDLOCK_ACCESS_FS_IOCTL_DEV | + LANDLOCK_ACCESS_FS_WRITE_FILE | + LANDLOCK_ACCESS_FS_MAKE_REG, + }; + ruleset_fd = landlock_create_ruleset(&attr, sizeof(attr), 0U); + if (ruleset_fd < 0) + err(1, "landlock_create_ruleset"); + } + + curr = open(".", O_PATH); + if (curr < 0) + err(1, "open(.)"); + + while (depth--) { + if (use_landlock) { + struct landlock_path_beneath_attr attr = { + .allowed_access = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .parent_fd = curr, + }; + if (landlock_add_rule(ruleset_fd, + LANDLOCK_RULE_PATH_BENEATH, &attr, + 0) < 0) + err(1, "landlock_add_rule"); + } + + if (mkdirat(curr, path, 0700) < 0) + err(1, "mkdirat(%s)", path); + + prev = curr; + curr = openat(curr, path, O_PATH); + if (curr < 0) + err(1, "openat(%s)", path); + + close(prev); + } + + if (use_landlock) { + if (prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0) < 0) + err(1, "prctl"); + + if (landlock_restrict_self(ruleset_fd, 0) < 0) + err(1, "landlock_restrict_self"); + } + + close(ruleset_fd); + return curr; +} + +static void remove_recursively(const size_t depth) +{ + const char *path = "d"; /* directory name */ + + int fd = openat(AT_FDCWD, ".", O_PATH); + + if (fd < 0) + err(1, "openat(.)"); + + for (size_t i = 0; i < depth - 1; i++) { + int oldfd = fd; + + fd = openat(fd, path, O_PATH); + if (fd < 0) + err(1, "openat(%s)", path); + close(oldfd); + } + + for (size_t i = 0; i < depth; i++) { + if (unlinkat(fd, path, AT_REMOVEDIR) < 0) + err(1, "unlinkat(%s)", path); + int newfd = openat(fd, "..", O_PATH); + + close(fd); + fd = newfd; + } + close(fd); +} + +int main(int argc, char *argv[]) +{ + bool use_landlock = true; + size_t num_iterations = 100000; + size_t num_subdirs = 10000; + int c, curr, fd; + struct tms start_time, end_time; + + setbuf(stdout, NULL); + while ((c = getopt(argc, argv, "hLd:n:")) != -1) { + switch (c) { + case 'h': + usage(argv[0]); + return EXIT_SUCCESS; + case 'L': + use_landlock = false; + break; + case 'd': + num_subdirs = atoi(optarg); + break; + case 'n': + num_iterations = atoi(optarg); + break; + default: + usage(argv[0]); + return EXIT_FAILURE; + } + } + + printf("*** Benchmark ***\n"); + printf("%zu dirs, %zu iterations, %s Landlock\n", num_subdirs, + num_iterations, use_landlock ? "with" : "without"); + + if (times(&start_time) == -1) + err(1, "times"); + + curr = build_directory(num_subdirs, use_landlock); + + for (int i = 0; i < num_iterations; i++) { + fd = openat(curr, "file.txt", O_CREAT | O_TRUNC | O_WRONLY, + 0600); + if (use_landlock) { + if (fd == 0) + errx(1, "openat succeeded, expected EACCES"); + if (errno != EACCES) + err(1, "openat expected EACCES, but got"); + } + if (fd != -1) + close(fd); + } + + if (times(&end_time) == -1) + err(1, "times"); + + printf("*** Benchmark concluded ***\n"); + printf("System: %ld clocks\n", + end_time.tms_stime - start_time.tms_stime); + printf("User : %ld clocks\n", + end_time.tms_utime - start_time.tms_utime); + printf("Clocks per second: %ld\n", CLOCKS_PER_SEC); + + close(curr); + + remove_recursively(num_subdirs); +} From 45f2a2926b2187d1b08132d2728af50785b007a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 6 Feb 2026 16:11:54 +0100 Subject: [PATCH 10/12] landlock: Add access_mask_subset() helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helper function checks whether an access_mask_t has a subset of the bits enabled than another one. This expresses the intent a bit smoother in the code and does not cost us anything when it gets inlined. Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20260206151154.97915-4-gnoack3000@gmail.com [mic: Improve subject] Signed-off-by: Mickaël Salaün --- security/landlock/access.h | 7 +++++++ security/landlock/fs.c | 4 ++-- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/security/landlock/access.h b/security/landlock/access.h index 7961c6630a2d..bab403470a6c 100644 --- a/security/landlock/access.h +++ b/security/landlock/access.h @@ -97,4 +97,11 @@ landlock_upgrade_handled_access_masks(struct access_masks access_masks) return access_masks; } +/* Checks the subset relation between access masks. */ +static inline bool access_mask_subset(access_mask_t subset, + access_mask_t superset) +{ + return (subset | superset) == superset; +} + #endif /* _SECURITY_LANDLOCK_ACCESS_H */ diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 8205673c8b1c..aa8e7cddb929 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -331,7 +331,7 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, /* Files only get access rights that make sense. */ if (!d_is_dir(path->dentry) && - (access_rights | ACCESS_FILE) != ACCESS_FILE) + !access_mask_subset(access_rights, ACCESS_FILE)) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL; @@ -1704,7 +1704,7 @@ static int hook_file_open(struct file *const file) ARRAY_SIZE(layer_masks)); #endif /* CONFIG_AUDIT */ - if ((open_access_request & allowed_access) == open_access_request) + if (access_mask_subset(open_access_request, allowed_access)) return 0; /* Sets access to reflect the actual request. */ From 65b691f84dba54a446518c51b25d3d4f1739dec5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?G=C3=BCnther=20Noack?= Date: Fri, 6 Feb 2026 16:11:55 +0100 Subject: [PATCH 11/12] landlock: Transpose the layer masks data structure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The layer masks data structure tracks the requested but unfulfilled access rights during an operation's security check. It stores one bit for each combination of access right and layer index. If the bit is set, that access right is not granted (yet) in the given layer and we have to traverse the path further upwards to grant it. Previously, the layer masks were stored as arrays mapping from access right indices to layer_mask_t. The layer_mask_t value then indicates all layers in which the given access right is still (tentatively) denied. This patch introduces struct layer_access_masks instead: This struct contains an array with the access_mask_t of each (tentatively) denied access right in that layer. The hypothesis of this patch is that this simplifies the code enough so that the resulting code will run faster: * We can use bitwise operations in multiple places where we previously looped over bits individually with macros. (Should require less branch speculation and lends itself to better loop unrolling.) * Code is ~75 lines smaller. Other noteworthy changes: * In no_more_access(), call a new helper function may_refer(), which only solves the asymmetric case. Previously, the code interleaved the checks for the two symmetric cases in RENAME_EXCHANGE. It feels that the code is clearer when renames without RENAME_EXCHANGE are more obviously the normal case. Tradeoffs: This change improves performance, at a slight size increase to the layer masks data structure. This fixes the size of the data structure at 32 bytes for all types of access rights. (64, once we introduce a 17th filesystem access right). For filesystem access rights, at the moment, the data structure has the same size as before, but once we introduce the 17th filesystem access right, it will double in size (from 32 to 64 bytes), as access_mask_t grows from 16 to 32 bit [1]. Link: https://lore.kernel.org/all/20260120.haeCh4li9Vae@digikod.net/ [1] Signed-off-by: Günther Noack Link: https://lore.kernel.org/r/20260206151154.97915-5-gnoack3000@gmail.com [mic: Cosmetic fixes, moved struct layer_access_masks definition] Signed-off-by: Mickaël Salaün --- security/landlock/access.h | 28 ++- security/landlock/audit.c | 81 +++------ security/landlock/audit.h | 3 +- security/landlock/domain.c | 44 ++--- security/landlock/domain.h | 3 +- security/landlock/fs.c | 348 ++++++++++++++++-------------------- security/landlock/net.c | 9 +- security/landlock/ruleset.c | 89 ++++----- security/landlock/ruleset.h | 6 +- 9 files changed, 271 insertions(+), 340 deletions(-) diff --git a/security/landlock/access.h b/security/landlock/access.h index bab403470a6c..42c95747d7bd 100644 --- a/security/landlock/access.h +++ b/security/landlock/access.h @@ -61,14 +61,30 @@ union access_masks_all { static_assert(sizeof(typeof_member(union access_masks_all, masks)) == sizeof(typeof_member(union access_masks_all, all))); -typedef u16 layer_mask_t; - -/* Makes sure all layers can be checked. */ -static_assert(BITS_PER_TYPE(layer_mask_t) >= LANDLOCK_MAX_NUM_LAYERS); +/** + * struct layer_access_masks - A boolean matrix of layers and access rights + * + * This has a bit for each combination of layer numbers and access rights. + * During access checks, it is used to represent the access rights for each + * layer which still need to be fulfilled. When all bits are 0, the access + * request is considered to be fulfilled. + */ +struct layer_access_masks { + /** + * @access: The unfulfilled access rights for each layer. + */ + access_mask_t access[LANDLOCK_MAX_NUM_LAYERS]; +}; /* - * Tracks domains responsible of a denied access. This is required to avoid - * storing in each object the full layer_masks[] required by update_request(). + * Tracks domains responsible of a denied access. This avoids storing in each + * object the full matrix of per-layer unfulfilled access rights, which is + * required by update_request(). + * + * Each nibble represents the layer index of the newest layer which denied a + * certain access right. For file system access rights, the upper four bits are + * the index of the layer which denies LANDLOCK_ACCESS_FS_IOCTL_DEV and the + * lower nibble represents LANDLOCK_ACCESS_FS_TRUNCATE. */ typedef u8 deny_masks_t; diff --git a/security/landlock/audit.c b/security/landlock/audit.c index e899995f1fd5..60ff217ab95b 100644 --- a/security/landlock/audit.c +++ b/security/landlock/audit.c @@ -180,38 +180,21 @@ static void test_get_hierarchy(struct kunit *const test) #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ +/* Get the youngest layer that denied the access_request. */ static size_t get_denied_layer(const struct landlock_ruleset *const domain, access_mask_t *const access_request, - const layer_mask_t (*const layer_masks)[], - const size_t layer_masks_size) + const struct layer_access_masks *masks) { - const unsigned long access_req = *access_request; - unsigned long access_bit; - access_mask_t missing = 0; - long youngest_layer = -1; - - for_each_set_bit(access_bit, &access_req, layer_masks_size) { - const layer_mask_t mask = (*layer_masks)[access_bit]; - long layer; - - if (!mask) - continue; - - /* __fls(1) == 0 */ - layer = __fls(mask); - if (layer > youngest_layer) { - youngest_layer = layer; - missing = BIT(access_bit); - } else if (layer == youngest_layer) { - missing |= BIT(access_bit); + for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) { + if (masks->access[i] & *access_request) { + *access_request &= masks->access[i]; + return i; } } - *access_request = missing; - if (youngest_layer == -1) - return domain->num_layers - 1; - - return youngest_layer; + /* Not found - fall back to default values */ + *access_request = 0; + return domain->num_layers - 1; } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST @@ -221,50 +204,39 @@ static void test_get_denied_layer(struct kunit *const test) const struct landlock_ruleset dom = { .num_layers = 5, }; - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT(1), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_DIR)] = BIT(1) | BIT(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_REMOVE_DIR)] = BIT(2), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_READ_DIR, + .access[1] = LANDLOCK_ACCESS_FS_READ_FILE | + LANDLOCK_ACCESS_FS_READ_DIR, + .access[2] = LANDLOCK_ACCESS_FS_REMOVE_DIR, }; access_mask_t access; access = LANDLOCK_ACCESS_FS_EXECUTE; - KUNIT_EXPECT_EQ(test, 0, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 0, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_EXECUTE); access = LANDLOCK_ACCESS_FS_READ_FILE; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_FILE); access = LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_EXECUTE | LANDLOCK_ACCESS_FS_READ_DIR; - KUNIT_EXPECT_EQ(test, 1, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 1, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, LANDLOCK_ACCESS_FS_READ_DIR); access = LANDLOCK_ACCESS_FS_WRITE_FILE; - KUNIT_EXPECT_EQ(test, 4, - get_denied_layer(&dom, &access, &layer_masks, - sizeof(layer_masks))); + KUNIT_EXPECT_EQ(test, 4, get_denied_layer(&dom, &access, &masks)); KUNIT_EXPECT_EQ(test, access, 0); } @@ -370,9 +342,6 @@ static bool is_valid_request(const struct landlock_request *const request) return false; } - if (WARN_ON_ONCE(!!request->layer_masks ^ !!request->layer_masks_size)) - return false; - if (request->deny_masks) { if (WARN_ON_ONCE(!request->all_existing_optional_access)) return false; @@ -406,12 +375,12 @@ void landlock_log_denial(const struct landlock_cred_security *const subject, if (missing) { /* Gets the nearest domain that denies the request. */ if (request->layer_masks) { - youngest_layer = get_denied_layer( - subject->domain, &missing, request->layer_masks, - request->layer_masks_size); + youngest_layer = get_denied_layer(subject->domain, + &missing, + request->layer_masks); } else { youngest_layer = get_layer_from_deny_masks( - &missing, request->all_existing_optional_access, + &missing, _LANDLOCK_ACCESS_FS_OPTIONAL, request->deny_masks); } youngest_denied = diff --git a/security/landlock/audit.h b/security/landlock/audit.h index 92428b7fc4d8..56778331b58c 100644 --- a/security/landlock/audit.h +++ b/security/landlock/audit.h @@ -43,8 +43,7 @@ struct landlock_request { access_mask_t access; /* Required fields for requests with layer masks. */ - const layer_mask_t (*layer_masks)[]; - size_t layer_masks_size; + const struct layer_access_masks *layer_masks; /* Required fields for requests with deny masks. */ const access_mask_t all_existing_optional_access; diff --git a/security/landlock/domain.c b/security/landlock/domain.c index a647b68e8d06..79cb3bbdf4c5 100644 --- a/security/landlock/domain.c +++ b/security/landlock/domain.c @@ -182,32 +182,36 @@ static void test_get_layer_deny_mask(struct kunit *const test) deny_masks_t landlock_get_deny_masks(const access_mask_t all_existing_optional_access, const access_mask_t optional_access, - const layer_mask_t (*const layer_masks)[], - const size_t layer_masks_size) + const struct layer_access_masks *const masks) { const unsigned long access_opt = optional_access; unsigned long access_bit; deny_masks_t deny_masks = 0; + access_mask_t all_denied = 0; /* This may require change with new object types. */ - WARN_ON_ONCE(access_opt != - (optional_access & all_existing_optional_access)); + WARN_ON_ONCE(!access_mask_subset(optional_access, + all_existing_optional_access)); - if (WARN_ON_ONCE(!layer_masks)) + if (WARN_ON_ONCE(!masks)) return 0; if (WARN_ON_ONCE(!access_opt)) return 0; - for_each_set_bit(access_bit, &access_opt, layer_masks_size) { - const layer_mask_t mask = (*layer_masks)[access_bit]; + for (ssize_t i = ARRAY_SIZE(masks->access) - 1; i >= 0; i--) { + const access_mask_t denied = masks->access[i] & optional_access; + const unsigned long newly_denied = denied & ~all_denied; - if (!mask) + if (!newly_denied) continue; - /* __fls(1) == 0 */ - deny_masks |= get_layer_deny_mask(all_existing_optional_access, - access_bit, __fls(mask)); + for_each_set_bit(access_bit, &newly_denied, + 8 * sizeof(access_mask_t)) { + deny_masks |= get_layer_deny_mask( + all_existing_optional_access, access_bit, i); + } + all_denied |= denied; } return deny_masks; } @@ -216,28 +220,28 @@ landlock_get_deny_masks(const access_mask_t all_existing_optional_access, static void test_landlock_get_deny_masks(struct kunit *const test) { - const layer_mask_t layers1[BITS_PER_TYPE(access_mask_t)] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | - BIT_ULL(9), - [BIT_INDEX(LANDLOCK_ACCESS_FS_TRUNCATE)] = BIT_ULL(1), - [BIT_INDEX(LANDLOCK_ACCESS_FS_IOCTL_DEV)] = BIT_ULL(2) | - BIT_ULL(0), + const struct layer_access_masks layers1 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_IOCTL_DEV, + .access[1] = LANDLOCK_ACCESS_FS_TRUNCATE, + .access[2] = LANDLOCK_ACCESS_FS_IOCTL_DEV, + .access[9] = LANDLOCK_ACCESS_FS_EXECUTE, }; KUNIT_EXPECT_EQ(test, 0x1, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_TRUNCATE, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); KUNIT_EXPECT_EQ(test, 0x20, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_IOCTL_DEV, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); KUNIT_EXPECT_EQ( test, 0x21, landlock_get_deny_masks(_LANDLOCK_ACCESS_FS_OPTIONAL, LANDLOCK_ACCESS_FS_TRUNCATE | LANDLOCK_ACCESS_FS_IOCTL_DEV, - &layers1, ARRAY_SIZE(layers1))); + &layers1)); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ diff --git a/security/landlock/domain.h b/security/landlock/domain.h index 621f054c9a2b..a9d57db0120d 100644 --- a/security/landlock/domain.h +++ b/security/landlock/domain.h @@ -122,8 +122,7 @@ struct landlock_hierarchy { deny_masks_t landlock_get_deny_masks(const access_mask_t all_existing_optional_access, const access_mask_t optional_access, - const layer_mask_t (*const layer_masks)[], - size_t layer_masks_size); + const struct layer_access_masks *const masks); int landlock_init_hierarchy_log(struct landlock_hierarchy *const hierarchy); diff --git a/security/landlock/fs.c b/security/landlock/fs.c index aa8e7cddb929..e764470f588c 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -398,57 +398,55 @@ static const struct access_masks any_fs = { .fs = ~0, }; +/* + * Returns true iff the child file with the given src_child access rights under + * src_parent would result in having the same or fewer access rights if it were + * moved under new_parent. + */ +static bool may_refer(const struct layer_access_masks *const src_parent, + const struct layer_access_masks *const src_child, + const struct layer_access_masks *const new_parent, + const bool child_is_dir) +{ + for (size_t i = 0; i < ARRAY_SIZE(new_parent->access); i++) { + access_mask_t child_access = src_parent->access[i] & + src_child->access[i]; + access_mask_t parent_access = new_parent->access[i]; + + if (!child_is_dir) { + child_access &= ACCESS_FILE; + parent_access &= ACCESS_FILE; + } + + if (!access_mask_subset(child_access, parent_access)) + return false; + } + return true; +} + /* * Check that a destination file hierarchy has more restrictions than a source * file hierarchy. This is only used for link and rename actions. * - * @layer_masks_child2: Optional child masks. + * Returns: true if child1 may be moved from parent1 to parent2 without + * increasing its access rights. If child2 is set, an additional condition is + * that child2 may be used from parent2 to parent1 without increasing its access + * rights. */ -static bool no_more_access( - const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], - const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], - const bool child1_is_directory, - const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], - const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], - const bool child2_is_directory) +static bool no_more_access(const struct layer_access_masks *const parent1, + const struct layer_access_masks *const child1, + const bool child1_is_dir, + const struct layer_access_masks *const parent2, + const struct layer_access_masks *const child2, + const bool child2_is_dir) { - unsigned long access_bit; + if (!may_refer(parent1, child1, parent2, child1_is_dir)) + return false; - for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); - access_bit++) { - /* Ignores accesses that only make sense for directories. */ - const bool is_file_access = - !!(BIT_ULL(access_bit) & ACCESS_FILE); + if (!child2) + return true; - if (child1_is_directory || is_file_access) { - /* - * Checks if the destination restrictions are a - * superset of the source ones (i.e. inherited access - * rights without child exceptions): - * restrictions(parent2) >= restrictions(child1) - */ - if ((((*layer_masks_parent1)[access_bit] & - (*layer_masks_child1)[access_bit]) | - (*layer_masks_parent2)[access_bit]) != - (*layer_masks_parent2)[access_bit]) - return false; - } - - if (!layer_masks_child2) - continue; - if (child2_is_directory || is_file_access) { - /* - * Checks inverted restrictions for RENAME_EXCHANGE: - * restrictions(parent1) >= restrictions(child2) - */ - if ((((*layer_masks_parent2)[access_bit] & - (*layer_masks_child2)[access_bit]) | - (*layer_masks_parent1)[access_bit]) != - (*layer_masks_parent1)[access_bit]) - return false; - } - } - return true; + return may_refer(parent2, child2, parent1, child2_is_dir); } #define NMA_TRUE(...) KUNIT_EXPECT_TRUE(test, no_more_access(__VA_ARGS__)) @@ -458,25 +456,25 @@ static bool no_more_access( static void test_no_more_access(struct kunit *const test) { - const layer_mask_t rx0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_READ_FILE)] = BIT_ULL(0), + const struct layer_access_masks rx0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_READ_FILE, }; - const layer_mask_t mx0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_MAKE_REG)] = BIT_ULL(0), + const struct layer_access_masks mx0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE | + LANDLOCK_ACCESS_FS_MAKE_REG, }; - const layer_mask_t x0[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), + const struct layer_access_masks x0 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t x1[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(1), + const struct layer_access_masks x1 = { + .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t x01[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0) | - BIT_ULL(1), + const struct layer_access_masks x01 = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_EXECUTE, }; - const layer_mask_t allows_all[LANDLOCK_NUM_ACCESS_FS] = {}; + const struct layer_access_masks allows_all = {}; /* Checks without restriction. */ NMA_TRUE(&x0, &allows_all, false, &allows_all, NULL, false); @@ -564,31 +562,30 @@ static void test_no_more_access(struct kunit *const test) #undef NMA_TRUE #undef NMA_FALSE -static bool is_layer_masks_allowed( - layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +static bool is_layer_masks_allowed(const struct layer_access_masks *masks) { - return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); + return !memchr_inv(&masks->access, 0, sizeof(masks->access)); } /* - * Removes @layer_masks accesses that are not requested. + * Removes @masks accesses that are not requested. * * Returns true if the request is allowed, false otherwise. */ -static bool -scope_to_request(const access_mask_t access_request, - layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +static bool scope_to_request(const access_mask_t access_request, + struct layer_access_masks *masks) { - const unsigned long access_req = access_request; - unsigned long access_bit; + bool saw_unfulfilled_access = false; - if (WARN_ON_ONCE(!layer_masks)) + if (WARN_ON_ONCE(!masks)) return true; - for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) - (*layer_masks)[access_bit] = 0; - - return is_layer_masks_allowed(layer_masks); + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + masks->access[i] &= access_request; + if (masks->access[i]) + saw_unfulfilled_access = true; + } + return !saw_unfulfilled_access; } #ifdef CONFIG_SECURITY_LANDLOCK_KUNIT_TEST @@ -596,48 +593,41 @@ scope_to_request(const access_mask_t access_request, static void test_scope_to_request_with_exec_none(struct kunit *const test) { /* Allows everything. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks masks = {}; /* Checks and scopes with execute. */ - KUNIT_EXPECT_TRUE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, - &layer_masks)); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + KUNIT_EXPECT_TRUE(test, + scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, &masks)); + KUNIT_EXPECT_EQ(test, 0, masks.access[0]); } static void test_scope_to_request_with_exec_some(struct kunit *const test) { /* Denies execute and write. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), + struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; /* Checks and scopes with execute. */ KUNIT_EXPECT_FALSE(test, scope_to_request(LANDLOCK_ACCESS_FS_EXECUTE, - &layer_masks)); - KUNIT_EXPECT_EQ(test, BIT_ULL(0), - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + &masks)); + KUNIT_EXPECT_EQ(test, LANDLOCK_ACCESS_FS_EXECUTE, masks.access[0]); + KUNIT_EXPECT_EQ(test, 0, masks.access[1]); } static void test_scope_to_request_without_access(struct kunit *const test) { /* Denies execute and write. */ - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)] = BIT_ULL(0), - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(1), + struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_EXECUTE, + .access[1] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; /* Checks and scopes without access request. */ - KUNIT_EXPECT_TRUE(test, scope_to_request(0, &layer_masks)); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_EXECUTE)]); - KUNIT_EXPECT_EQ(test, 0, - layer_masks[BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)]); + KUNIT_EXPECT_TRUE(test, scope_to_request(0, &masks)); + KUNIT_EXPECT_EQ(test, 0, masks.access[0]); + KUNIT_EXPECT_EQ(test, 0, masks.access[1]); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ @@ -646,20 +636,16 @@ static void test_scope_to_request_without_access(struct kunit *const test) * Returns true if there is at least one access right different than * LANDLOCK_ACCESS_FS_REFER. */ -static bool -is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], - const access_mask_t access_request) +static bool is_eacces(const struct layer_access_masks *masks, + const access_mask_t access_request) { - unsigned long access_bit; - /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ - const unsigned long access_check = access_request & - ~LANDLOCK_ACCESS_FS_REFER; - - if (!layer_masks) + if (!masks) return false; - for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { - if ((*layer_masks)[access_bit]) + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ + if (masks->access[i] & access_request & + ~LANDLOCK_ACCESS_FS_REFER) return true; } return false; @@ -672,37 +658,37 @@ is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], static void test_is_eacces_with_none(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + const struct layer_access_masks masks = {}; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_refer(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_REFER)] = BIT_ULL(0), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_REFER, }; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } static void test_is_eacces_with_write(struct kunit *const test) { - const layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = { - [BIT_INDEX(LANDLOCK_ACCESS_FS_WRITE_FILE)] = BIT_ULL(0), + const struct layer_access_masks masks = { + .access[0] = LANDLOCK_ACCESS_FS_WRITE_FILE, }; - IE_FALSE(&layer_masks, 0); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_REFER); - IE_FALSE(&layer_masks, LANDLOCK_ACCESS_FS_EXECUTE); + IE_FALSE(&masks, 0); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_REFER); + IE_FALSE(&masks, LANDLOCK_ACCESS_FS_EXECUTE); - IE_TRUE(&layer_masks, LANDLOCK_ACCESS_FS_WRITE_FILE); + IE_TRUE(&masks, LANDLOCK_ACCESS_FS_WRITE_FILE); } #endif /* CONFIG_SECURITY_LANDLOCK_KUNIT_TEST */ @@ -752,26 +738,25 @@ static void test_is_eacces_with_write(struct kunit *const test) * - true if the access request is granted; * - false otherwise. */ -static bool is_access_to_paths_allowed( - const struct landlock_ruleset *const domain, - const struct path *const path, - const access_mask_t access_request_parent1, - layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], - struct landlock_request *const log_request_parent1, - struct dentry *const dentry_child1, - const access_mask_t access_request_parent2, - layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], - struct landlock_request *const log_request_parent2, - struct dentry *const dentry_child2) +static bool +is_access_to_paths_allowed(const struct landlock_ruleset *const domain, + const struct path *const path, + const access_mask_t access_request_parent1, + struct layer_access_masks *layer_masks_parent1, + struct landlock_request *const log_request_parent1, + struct dentry *const dentry_child1, + const access_mask_t access_request_parent2, + struct layer_access_masks *layer_masks_parent2, + struct landlock_request *const log_request_parent2, + struct dentry *const dentry_child2) { bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, child1_is_directory = true, child2_is_directory = true; struct path walker_path; access_mask_t access_masked_parent1, access_masked_parent2; - layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], - _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; - layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, - (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; + struct layer_access_masks _layer_masks_child1, _layer_masks_child2; + struct layer_access_masks *layer_masks_child1 = NULL, + *layer_masks_child2 = NULL; if (!access_request_parent1 && !access_request_parent2) return true; @@ -811,22 +796,20 @@ static bool is_access_to_paths_allowed( } if (unlikely(dentry_child1)) { - landlock_unmask_layers( - find_rule(domain, dentry_child1), - landlock_init_layer_masks( - domain, LANDLOCK_MASK_ACCESS_FS, - &_layer_masks_child1, LANDLOCK_KEY_INODE), - &_layer_masks_child1, ARRAY_SIZE(_layer_masks_child1)); + if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child1, + LANDLOCK_KEY_INODE)) + landlock_unmask_layers(find_rule(domain, dentry_child1), + &_layer_masks_child1); layer_masks_child1 = &_layer_masks_child1; child1_is_directory = d_is_dir(dentry_child1); } if (unlikely(dentry_child2)) { - landlock_unmask_layers( - find_rule(domain, dentry_child2), - landlock_init_layer_masks( - domain, LANDLOCK_MASK_ACCESS_FS, - &_layer_masks_child2, LANDLOCK_KEY_INODE), - &_layer_masks_child2, ARRAY_SIZE(_layer_masks_child2)); + if (landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child2, + LANDLOCK_KEY_INODE)) + landlock_unmask_layers(find_rule(domain, dentry_child2), + &_layer_masks_child2); layer_masks_child2 = &_layer_masks_child2; child2_is_directory = d_is_dir(dentry_child2); } @@ -881,16 +864,12 @@ static bool is_access_to_paths_allowed( } rule = find_rule(domain, walker_path.dentry); - allowed_parent1 = allowed_parent1 || - landlock_unmask_layers( - rule, access_masked_parent1, - layer_masks_parent1, - ARRAY_SIZE(*layer_masks_parent1)); - allowed_parent2 = allowed_parent2 || - landlock_unmask_layers( - rule, access_masked_parent2, - layer_masks_parent2, - ARRAY_SIZE(*layer_masks_parent2)); + allowed_parent1 = + allowed_parent1 || + landlock_unmask_layers(rule, layer_masks_parent1); + allowed_parent2 = + allowed_parent2 || + landlock_unmask_layers(rule, layer_masks_parent2); /* Stops when a rule from each layer grants access. */ if (allowed_parent1 && allowed_parent2) @@ -950,8 +929,6 @@ jump_up: log_request_parent1->audit.u.path = *path; log_request_parent1->access = access_masked_parent1; log_request_parent1->layer_masks = layer_masks_parent1; - log_request_parent1->layer_masks_size = - ARRAY_SIZE(*layer_masks_parent1); } if (!allowed_parent2 && log_request_parent2) { @@ -960,8 +937,6 @@ jump_up: log_request_parent2->audit.u.path = *path; log_request_parent2->access = access_masked_parent2; log_request_parent2->layer_masks = layer_masks_parent2; - log_request_parent2->layer_masks_size = - ARRAY_SIZE(*layer_masks_parent2); } #endif /* CONFIG_AUDIT */ @@ -976,7 +951,7 @@ static int current_check_access_path(const struct path *const path, }; const struct landlock_cred_security *const subject = landlock_get_applicable_subject(current_cred(), masks, NULL); - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks; struct landlock_request request = {}; if (!subject) @@ -1051,12 +1026,11 @@ static access_mask_t maybe_remove(const struct dentry *const dentry) * - true if all the domain access rights are allowed for @dir; * - false if the walk reached @mnt_root. */ -static bool collect_domain_accesses( - const struct landlock_ruleset *const domain, - const struct dentry *const mnt_root, struct dentry *dir, - layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) +static bool collect_domain_accesses(const struct landlock_ruleset *const domain, + const struct dentry *const mnt_root, + struct dentry *dir, + struct layer_access_masks *layer_masks_dom) { - unsigned long access_dom; bool ret = false; if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) @@ -1064,18 +1038,17 @@ static bool collect_domain_accesses( if (is_nouser_or_private(dir)) return true; - access_dom = landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, - layer_masks_dom, - LANDLOCK_KEY_INODE); + if (!landlock_init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + layer_masks_dom, LANDLOCK_KEY_INODE)) + return true; dget(dir); while (true) { struct dentry *parent_dentry; /* Gets all layers allowing all domain accesses. */ - if (landlock_unmask_layers(find_rule(domain, dir), access_dom, - layer_masks_dom, - ARRAY_SIZE(*layer_masks_dom))) { + if (landlock_unmask_layers(find_rule(domain, dir), + layer_masks_dom)) { /* * Stops when all handled accesses are allowed by at * least one rule in each layer. @@ -1163,8 +1136,8 @@ static int current_check_refer_path(struct dentry *const old_dentry, access_mask_t access_request_parent1, access_request_parent2; struct path mnt_dir; struct dentry *old_parent; - layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS] = {}, - layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks_parent1 = {}, + layer_masks_parent2 = {}; struct landlock_request request1 = {}, request2 = {}; if (!subject) @@ -1640,7 +1613,7 @@ static bool is_device(const struct file *const file) static int hook_file_open(struct file *const file) { - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + struct layer_access_masks layer_masks = {}; access_mask_t open_access_request, full_access_request, allowed_access, optional_access; const struct landlock_cred_security *const subject = @@ -1675,20 +1648,14 @@ static int hook_file_open(struct file *const file) &layer_masks, &request, NULL, 0, NULL, NULL, NULL)) { allowed_access = full_access_request; } else { - unsigned long access_bit; - const unsigned long access_req = full_access_request; - /* * Calculate the actual allowed access rights from layer_masks. - * Add each access right to allowed_access which has not been - * vetoed by any layer. + * Remove the access rights from the full access request which + * are still unfulfilled in any of the layers. */ - allowed_access = 0; - for_each_set_bit(access_bit, &access_req, - ARRAY_SIZE(layer_masks)) { - if (!layer_masks[access_bit]) - allowed_access |= BIT_ULL(access_bit); - } + allowed_access = full_access_request; + for (size_t i = 0; i < ARRAY_SIZE(layer_masks.access); i++) + allowed_access &= ~layer_masks.access[i]; } /* @@ -1700,8 +1667,7 @@ static int hook_file_open(struct file *const file) landlock_file(file)->allowed_access = allowed_access; #ifdef CONFIG_AUDIT landlock_file(file)->deny_masks = landlock_get_deny_masks( - _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks, - ARRAY_SIZE(layer_masks)); + _LANDLOCK_ACCESS_FS_OPTIONAL, optional_access, &layer_masks); #endif /* CONFIG_AUDIT */ if (access_mask_subset(open_access_request, allowed_access)) diff --git a/security/landlock/net.c b/security/landlock/net.c index 59438285e73b..c368649985c5 100644 --- a/security/landlock/net.c +++ b/security/landlock/net.c @@ -47,7 +47,7 @@ static int current_check_access_socket(struct socket *const sock, access_mask_t access_request) { __be16 port; - layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_NET] = {}; + struct layer_access_masks layer_masks = {}; const struct landlock_rule *rule; struct landlock_id id = { .type = LANDLOCK_KEY_NET_PORT, @@ -191,8 +191,10 @@ static int current_check_access_socket(struct socket *const sock, access_request = landlock_init_layer_masks(subject->domain, access_request, &layer_masks, LANDLOCK_KEY_NET_PORT); - if (landlock_unmask_layers(rule, access_request, &layer_masks, - ARRAY_SIZE(layer_masks))) + if (!access_request) + return 0; + + if (landlock_unmask_layers(rule, &layer_masks)) return 0; audit_net.family = address->sa_family; @@ -203,7 +205,6 @@ static int current_check_access_socket(struct socket *const sock, .audit.u.net = &audit_net, .access = access_request, .layer_masks = &layer_masks, - .layer_masks_size = ARRAY_SIZE(layer_masks), }); return -EACCES; } diff --git a/security/landlock/ruleset.c b/security/landlock/ruleset.c index 0a5b0c76b3f7..419b237de635 100644 --- a/security/landlock/ruleset.c +++ b/security/landlock/ruleset.c @@ -612,22 +612,24 @@ landlock_find_rule(const struct landlock_ruleset *const ruleset, return NULL; } -/* - * @layer_masks is read and may be updated according to the access request and - * the matching rule. - * @masks_array_size must be equal to ARRAY_SIZE(*layer_masks). +/** + * landlock_unmask_layers - Remove the access rights in @masks + * which are granted in @rule * - * Returns true if the request is allowed (i.e. relevant layer masks for the - * request are empty). + * Updates the set of (per-layer) unfulfilled access rights @masks + * so that all the access rights granted in @rule are removed from it + * (because they are now fulfilled). + * + * @rule: A rule that grants a set of access rights for each layer + * @masks: A matrix of unfulfilled access rights for each layer + * + * Returns true if the request is allowed (i.e. the access rights granted all + * remaining unfulfilled access rights and masks has no leftover set bits). */ bool landlock_unmask_layers(const struct landlock_rule *const rule, - const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], - const size_t masks_array_size) + struct layer_access_masks *masks) { - size_t layer_level; - - if (!access_request || !layer_masks) + if (!masks) return true; if (!rule) return false; @@ -642,28 +644,18 @@ bool landlock_unmask_layers(const struct landlock_rule *const rule, * by only one rule, but by the union (binary OR) of multiple rules. * E.g. /a/b + /a => /a/b */ - for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { - const struct landlock_layer *const layer = - &rule->layers[layer_level]; - const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); - const unsigned long access_req = access_request; - unsigned long access_bit; - bool is_empty; + for (size_t i = 0; i < rule->num_layers; i++) { + const struct landlock_layer *const layer = &rule->layers[i]; - /* - * Records in @layer_masks which layer grants access to each requested - * access: bit cleared if the related layer grants access. - */ - is_empty = true; - for_each_set_bit(access_bit, &access_req, masks_array_size) { - if (layer->access & BIT_ULL(access_bit)) - (*layer_masks)[access_bit] &= ~layer_bit; - is_empty = is_empty && !(*layer_masks)[access_bit]; - } - if (is_empty) - return true; + /* Clear the bits where the layer in the rule grants access. */ + masks->access[layer->level - 1] &= ~layer->access; } - return false; + + for (size_t i = 0; i < ARRAY_SIZE(masks->access); i++) { + if (masks->access[i]) + return false; + } + return true; } typedef access_mask_t @@ -673,13 +665,12 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset, /** * landlock_init_layer_masks - Initialize layer masks from an access request * - * Populates @layer_masks such that for each access right in @access_request, + * Populates @masks such that for each access right in @access_request, * the bits for all the layers are set where this access right is handled. * * @domain: The domain that defines the current restrictions. * @access_request: The requested access rights to check. - * @layer_masks: It must contain %LANDLOCK_NUM_ACCESS_FS or - * %LANDLOCK_NUM_ACCESS_NET elements according to @key_type. + * @masks: Layer access masks to populate. * @key_type: The key type to switch between access masks of different types. * * Returns: An access mask where each access right bit is set which is handled @@ -688,23 +679,20 @@ get_access_mask_t(const struct landlock_ruleset *const ruleset, access_mask_t landlock_init_layer_masks(const struct landlock_ruleset *const domain, const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], + struct layer_access_masks *const masks, const enum landlock_key_type key_type) { access_mask_t handled_accesses = 0; - size_t layer_level, num_access; get_access_mask_t *get_access_mask; switch (key_type) { case LANDLOCK_KEY_INODE: get_access_mask = landlock_get_fs_access_mask; - num_access = LANDLOCK_NUM_ACCESS_FS; break; #if IS_ENABLED(CONFIG_INET) case LANDLOCK_KEY_NET_PORT: get_access_mask = landlock_get_net_access_mask; - num_access = LANDLOCK_NUM_ACCESS_NET; break; #endif /* IS_ENABLED(CONFIG_INET) */ @@ -713,27 +701,18 @@ landlock_init_layer_masks(const struct landlock_ruleset *const domain, return 0; } - memset(layer_masks, 0, - array_size(sizeof((*layer_masks)[0]), num_access)); - /* An empty access request can happen because of O_WRONLY | O_RDWR. */ if (!access_request) return 0; - /* Saves all handled accesses per layer. */ - for (layer_level = 0; layer_level < domain->num_layers; layer_level++) { - const unsigned long access_req = access_request; - const access_mask_t access_mask = - get_access_mask(domain, layer_level); - unsigned long access_bit; + for (size_t i = 0; i < domain->num_layers; i++) { + const access_mask_t handled = get_access_mask(domain, i); - for_each_set_bit(access_bit, &access_req, num_access) { - if (BIT_ULL(access_bit) & access_mask) { - (*layer_masks)[access_bit] |= - BIT_ULL(layer_level); - handled_accesses |= BIT_ULL(access_bit); - } - } + masks->access[i] = access_request & handled; + handled_accesses |= masks->access[i]; } + for (size_t i = domain->num_layers; i < ARRAY_SIZE(masks->access); i++) + masks->access[i] = 0; + return handled_accesses; } diff --git a/security/landlock/ruleset.h b/security/landlock/ruleset.h index 1a78cba662b2..9d6dc632684c 100644 --- a/security/landlock/ruleset.h +++ b/security/landlock/ruleset.h @@ -302,14 +302,12 @@ landlock_get_scope_mask(const struct landlock_ruleset *const ruleset, } bool landlock_unmask_layers(const struct landlock_rule *const rule, - const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], - const size_t masks_array_size); + struct layer_access_masks *masks); access_mask_t landlock_init_layer_masks(const struct landlock_ruleset *const domain, const access_mask_t access_request, - layer_mask_t (*const layer_masks)[], + struct layer_access_masks *masks, const enum landlock_key_type key_type); #endif /* _SECURITY_LANDLOCK_RULESET_H */ From e265b330b93e3a3f9ff5256451d4f09b5f89b239 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micka=C3=ABl=20Sala=C3=BCn?= Date: Sat, 7 Feb 2026 12:11:35 +0100 Subject: [PATCH 12/12] =?UTF-8?q?mailmap:=20Add=20entry=20for=20Micka?= =?UTF-8?q?=C3=ABl=20Sala=C3=BCn?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit My Microsoft address is no longer used. Add a mailmap entry to reflect that. Cc: Günther Noack Cc: James Morris Reviewed-by: Paul Moore Link: https://lore.kernel.org/r/20260207111136.577249-1-mic@digikod.net Signed-off-by: Mickaël Salaün --- .mailmap | 1 + 1 file changed, 1 insertion(+) diff --git a/.mailmap b/.mailmap index 4a8a160f28ed..2917965292d3 100644 --- a/.mailmap +++ b/.mailmap @@ -557,6 +557,7 @@ Michel Dänzer Michel Lespinasse Michel Lespinasse Michel Lespinasse +Mickaël Salaün Miguel Ojeda Mike Rapoport Mike Rapoport