mirror of
https://github.com/torvalds/linux.git
synced 2026-03-08 03:24:45 +01:00
rcu: Add noinstr-fast rcu_read_{,un}lock_tasks_trace() APIs
When expressing RCU Tasks Trace in terms of SRCU-fast, it was
necessary to keep a nesting count and per-CPU srcu_ctr structure
pointer in the task_struct structure, which is slow to access.
But an alternative is to instead make rcu_read_lock_tasks_trace() and
rcu_read_unlock_tasks_trace(), which match the underlying SRCU-fast
semantics, avoiding the task_struct accesses.
When all callers have switched to the new API, the previous
rcu_read_lock_trace() and rcu_read_unlock_trace() APIs will be removed.
The rcu_read_{,un}lock_{,tasks_}trace() functions need to use smp_mb()
only if invoked where RCU is not watching, that is, from locations where
a call to rcu_is_watching() would return false. In architectures that
define the ARCH_WANTS_NO_INSTR Kconfig option, use of noinstr and friends
ensures that tracing happens only where RCU is watching, so those
architectures can dispense entirely with the read-side calls to smp_mb().
Other architectures include these read-side calls by default, but in many
installations there might be either larger than average tolerance for
risk, prohibition of removing tracing on a running system, or careful
review and approval of removing of tracing. Such installations can
build their kernels with CONFIG_TASKS_TRACE_RCU_NO_MB=y to avoid those
read-side calls to smp_mb(), thus accepting responsibility for run-time
removal of tracing from code regions that RCU is not watching.
Those wishing to disable read-side memory barriers for an entire
architecture can select this TASKS_TRACE_RCU_NO_MB Kconfig option,
hence the polarity.
[ paulmck: Apply Peter Zijlstra feedback. ]
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Andrii Nakryiko <andrii@kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: bpf@vger.kernel.org
Reviewed-by: Joel Fernandes <joelagnelf@nvidia.com>
Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
This commit is contained in:
parent
176a6aeaf1
commit
1a72f4bb6f
2 changed files with 80 additions and 8 deletions
|
|
@ -34,6 +34,53 @@ static inline int rcu_read_lock_trace_held(void)
|
|||
|
||||
#ifdef CONFIG_TASKS_TRACE_RCU
|
||||
|
||||
/**
|
||||
* rcu_read_lock_tasks_trace - mark beginning of RCU-trace read-side critical section
|
||||
*
|
||||
* When synchronize_rcu_tasks_trace() is invoked by one task, then that
|
||||
* task is guaranteed to block until all other tasks exit their read-side
|
||||
* critical sections. Similarly, if call_rcu_trace() is invoked on one
|
||||
* task while other tasks are within RCU read-side critical sections,
|
||||
* invocation of the corresponding RCU callback is deferred until after
|
||||
* the all the other tasks exit their critical sections.
|
||||
*
|
||||
* For more details, please see the documentation for
|
||||
* srcu_read_lock_fast(). For a description of how implicit RCU
|
||||
* readers provide the needed ordering for architectures defining the
|
||||
* ARCH_WANTS_NO_INSTR Kconfig option (and thus promising never to trace
|
||||
* code where RCU is not watching), please see the __srcu_read_lock_fast()
|
||||
* (non-kerneldoc) header comment. Otherwise, the smp_mb() below provided
|
||||
* the needed ordering.
|
||||
*/
|
||||
static inline struct srcu_ctr __percpu *rcu_read_lock_tasks_trace(void)
|
||||
{
|
||||
struct srcu_ctr __percpu *ret = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
|
||||
|
||||
rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
|
||||
smp_mb(); // Provide ordering on noinstr-incomplete architectures.
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_read_unlock_tasks_trace - mark end of RCU-trace read-side critical section
|
||||
* @scp: return value from corresponding rcu_read_lock_tasks_trace().
|
||||
*
|
||||
* Pairs with the preceding call to rcu_read_lock_tasks_trace() that
|
||||
* returned the value passed in via scp.
|
||||
*
|
||||
* For more details, please see the documentation for rcu_read_unlock().
|
||||
* For memory-ordering information, please see the header comment for the
|
||||
* rcu_read_lock_tasks_trace() function.
|
||||
*/
|
||||
static inline void rcu_read_unlock_tasks_trace(struct srcu_ctr __percpu *scp)
|
||||
{
|
||||
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
|
||||
smp_mb(); // Provide ordering on noinstr-incomplete architectures.
|
||||
__srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
|
||||
srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
}
|
||||
|
||||
/**
|
||||
* rcu_read_lock_trace - mark beginning of RCU-trace read-side critical section
|
||||
*
|
||||
|
|
@ -50,14 +97,15 @@ static inline void rcu_read_lock_trace(void)
|
|||
{
|
||||
struct task_struct *t = current;
|
||||
|
||||
rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
if (t->trc_reader_nesting++) {
|
||||
// In case we interrupted a Tasks Trace RCU reader.
|
||||
rcu_try_lock_acquire(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
return;
|
||||
}
|
||||
barrier(); // nesting before scp to protect against interrupt handler.
|
||||
t->trc_reader_scp = srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
|
||||
smp_mb(); // Placeholder for more selective ordering
|
||||
t->trc_reader_scp = __srcu_read_lock_fast(&rcu_tasks_trace_srcu_struct);
|
||||
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
|
||||
smp_mb(); // Placeholder for more selective ordering
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -74,13 +122,14 @@ static inline void rcu_read_unlock_trace(void)
|
|||
struct srcu_ctr __percpu *scp;
|
||||
struct task_struct *t = current;
|
||||
|
||||
smp_mb(); // Placeholder for more selective ordering
|
||||
scp = t->trc_reader_scp;
|
||||
barrier(); // scp before nesting to protect against interrupt handler.
|
||||
if (!--t->trc_reader_nesting)
|
||||
srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
|
||||
else
|
||||
srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
if (!--t->trc_reader_nesting) {
|
||||
if (!IS_ENABLED(CONFIG_TASKS_TRACE_RCU_NO_MB))
|
||||
smp_mb(); // Placeholder for more selective ordering
|
||||
__srcu_read_unlock_fast(&rcu_tasks_trace_srcu_struct, scp);
|
||||
}
|
||||
srcu_lock_release(&rcu_tasks_trace_srcu_struct.dep_map);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -142,6 +142,29 @@ config TASKS_TRACE_RCU
|
|||
default n
|
||||
select IRQ_WORK
|
||||
|
||||
config TASKS_TRACE_RCU_NO_MB
|
||||
bool "Override RCU Tasks Trace inclusion of read-side memory barriers"
|
||||
depends on RCU_EXPERT && TASKS_TRACE_RCU
|
||||
default ARCH_WANTS_NO_INSTR
|
||||
help
|
||||
This option prevents the use of read-side memory barriers in
|
||||
rcu_read_lock_tasks_trace() and rcu_read_unlock_tasks_trace()
|
||||
even in kernels built with CONFIG_ARCH_WANTS_NO_INSTR=n, that is,
|
||||
in kernels that do not have noinstr set up in entry/exit code.
|
||||
By setting this option, you are promising to carefully review
|
||||
use of ftrace, BPF, and friends to ensure that no tracing
|
||||
operation is attached to a function that runs in that portion
|
||||
of the entry/exit code that RCU does not watch, that is,
|
||||
where rcu_is_watching() returns false. Alternatively, you
|
||||
might choose to never remove traces except by rebooting.
|
||||
|
||||
Those wishing to disable read-side memory barriers for an entire
|
||||
architecture can select this Kconfig option, hence the polarity.
|
||||
|
||||
Say Y here if you need speed and will review use of tracing.
|
||||
Say N here for certain esoteric testing of RCU itself.
|
||||
Take the default if you are unsure.
|
||||
|
||||
config RCU_STALL_COMMON
|
||||
def_bool TREE_RCU
|
||||
help
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue