context_tracking: Convert state to atomic_t
Context tracking's state and dynticks counter are going to be merged in a single field so that both updates can happen atomically and at the same time. Prepare for that with converting the state into an atomic_t. [ paulmck: Apply kernel test robot feedback. ] Signed-off-by: Frederic Weisbecker <frederic@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: Neeraj Upadhyay <quic_neeraju@quicinc.com> Cc: Uladzislau Rezki <uladzislau.rezki@sony.com> Cc: Joel Fernandes <joel@joelfernandes.org> Cc: Boqun Feng <boqun.feng@gmail.com> Cc: Nicolas Saenz Julienne <nsaenz@kernel.org> Cc: Marcelo Tosatti <mtosatti@redhat.com> Cc: Xiongfeng Wang <wangxiongfeng2@huawei.com> Cc: Yu Liao <liaoyu15@huawei.com> Cc: Phil Auld <pauld@redhat.com> Cc: Paul Gortmaker<paul.gortmaker@windriver.com> Cc: Alex Belits <abelits@marvell.com> Signed-off-by: Paul E. McKenney <paulmck@kernel.org> Reviewed-by: Nicolas Saenz Julienne <nsaenzju@redhat.com> Tested-by: Nicolas Saenz Julienne <nsaenzju@redhat.com>
This commit is contained in:
committed by
Paul E. McKenney
parent
c33ef43a35
commit
171476775d
+94
-49
@@ -28,8 +28,8 @@ DEFINE_PER_CPU(struct context_tracking, context_tracking) = {
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_IDLE
|
||||
.dynticks_nesting = 1,
|
||||
.dynticks_nmi_nesting = DYNTICK_IRQ_NONIDLE,
|
||||
.dynticks = ATOMIC_INIT(1),
|
||||
#endif
|
||||
.state = ATOMIC_INIT(RCU_DYNTICKS_IDX),
|
||||
};
|
||||
EXPORT_SYMBOL_GPL(context_tracking);
|
||||
|
||||
@@ -76,7 +76,7 @@ static __always_inline void rcu_dynticks_task_trace_exit(void)
|
||||
* RCU is watching prior to the call to this function and is no longer
|
||||
* watching upon return.
|
||||
*/
|
||||
static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
static noinstr void ct_kernel_exit_state(int offset)
|
||||
{
|
||||
int seq;
|
||||
|
||||
@@ -86,9 +86,9 @@ static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
* next idle sojourn.
|
||||
*/
|
||||
rcu_dynticks_task_trace_enter(); // Before ->dynticks update!
|
||||
seq = rcu_dynticks_inc(1);
|
||||
seq = ct_state_inc(offset);
|
||||
// RCU is no longer watching. Better be in extended quiescent state!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & 0x1));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && (seq & RCU_DYNTICKS_IDX));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -96,7 +96,7 @@ static noinstr void rcu_dynticks_eqs_enter(void)
|
||||
* called from an extended quiescent state, that is, RCU is not watching
|
||||
* prior to the call to this function and is watching upon return.
|
||||
*/
|
||||
static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
static noinstr void ct_kernel_enter_state(int offset)
|
||||
{
|
||||
int seq;
|
||||
|
||||
@@ -105,10 +105,10 @@ static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
* and we also must force ordering with the next RCU read-side
|
||||
* critical section.
|
||||
*/
|
||||
seq = rcu_dynticks_inc(1);
|
||||
seq = ct_state_inc(offset);
|
||||
// RCU is now watching. Better not be in an extended quiescent state!
|
||||
rcu_dynticks_task_trace_exit(); // After ->dynticks update!
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & 0x1));
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(seq & RCU_DYNTICKS_IDX));
|
||||
}
|
||||
|
||||
/*
|
||||
@@ -119,7 +119,7 @@ static noinstr void rcu_dynticks_eqs_exit(void)
|
||||
* the possibility of usermode upcalls having messed up our count
|
||||
* of interrupt nesting level during the prior busy period.
|
||||
*/
|
||||
static void noinstr rcu_eqs_enter(bool user)
|
||||
static void noinstr ct_kernel_exit(bool user, int offset)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
@@ -139,13 +139,13 @@ static void noinstr rcu_eqs_enter(bool user)
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
rcu_preempt_deferred_qs(current);
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
|
||||
// instrumentation for the noinstr ct_kernel_exit_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
instrumentation_end();
|
||||
WRITE_ONCE(ct->dynticks_nesting, 0); /* Avoid irq-access tearing. */
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
ct_kernel_exit_state(offset);
|
||||
// ... but is no longer watching here.
|
||||
rcu_dynticks_task_enter();
|
||||
}
|
||||
@@ -158,7 +158,7 @@ static void noinstr rcu_eqs_enter(bool user)
|
||||
* allow for the possibility of usermode upcalls messing up our count of
|
||||
* interrupt nesting level during the busy period that is just now starting.
|
||||
*/
|
||||
static void noinstr rcu_eqs_exit(bool user)
|
||||
static void noinstr ct_kernel_enter(bool user, int offset)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
long oldval;
|
||||
@@ -173,12 +173,12 @@ static void noinstr rcu_eqs_exit(bool user)
|
||||
}
|
||||
rcu_dynticks_task_exit();
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
ct_kernel_enter_state(offset);
|
||||
// ... but is watching here.
|
||||
instrumentation_begin();
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
|
||||
// instrumentation for the noinstr ct_kernel_enter_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
trace_rcu_dyntick(TPS("End"), ct_dynticks_nesting(), 1, ct_dynticks());
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !user && !is_idle_task(current));
|
||||
@@ -192,7 +192,7 @@ static void noinstr rcu_eqs_exit(bool user)
|
||||
* ct_nmi_exit - inform RCU of exit from NMI context
|
||||
*
|
||||
* If we are returning from the outermost NMI handler that interrupted an
|
||||
* RCU-idle period, update ct->dynticks and ct->dynticks_nmi_nesting
|
||||
* RCU-idle period, update ct->state and ct->dynticks_nmi_nesting
|
||||
* to let the RCU grace-period handling know that the CPU is back to
|
||||
* being RCU-idle.
|
||||
*
|
||||
@@ -229,12 +229,12 @@ void noinstr ct_nmi_exit(void)
|
||||
trace_rcu_dyntick(TPS("Startirq"), ct_dynticks_nmi_nesting(), 0, ct_dynticks());
|
||||
WRITE_ONCE(ct->dynticks_nmi_nesting, 0); /* Avoid store tearing. */
|
||||
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_enter()
|
||||
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
|
||||
// instrumentation for the noinstr ct_kernel_exit_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
instrumentation_end();
|
||||
|
||||
// RCU is watching here ...
|
||||
rcu_dynticks_eqs_enter();
|
||||
ct_kernel_exit_state(RCU_DYNTICKS_IDX);
|
||||
// ... but is no longer watching here.
|
||||
|
||||
if (!in_nmi())
|
||||
@@ -244,7 +244,7 @@ void noinstr ct_nmi_exit(void)
|
||||
/**
|
||||
* ct_nmi_enter - inform RCU of entry to NMI context
|
||||
*
|
||||
* If the CPU was idle from RCU's viewpoint, update ct->dynticks and
|
||||
* If the CPU was idle from RCU's viewpoint, update ct->state and
|
||||
* ct->dynticks_nmi_nesting to let the RCU grace-period handling know
|
||||
* that the CPU is active. This implementation permits nested NMIs, as
|
||||
* long as the nesting level does not overflow an int. (You will probably
|
||||
@@ -275,14 +275,14 @@ void noinstr ct_nmi_enter(void)
|
||||
rcu_dynticks_task_exit();
|
||||
|
||||
// RCU is not watching here ...
|
||||
rcu_dynticks_eqs_exit();
|
||||
ct_kernel_enter_state(RCU_DYNTICKS_IDX);
|
||||
// ... but is watching here.
|
||||
|
||||
instrumentation_begin();
|
||||
// instrumentation for the noinstr rcu_dynticks_curr_cpu_in_eqs()
|
||||
instrument_atomic_read(&ct->dynticks, sizeof(ct->dynticks));
|
||||
// instrumentation for the noinstr rcu_dynticks_eqs_exit()
|
||||
instrument_atomic_write(&ct->dynticks, sizeof(ct->dynticks));
|
||||
instrument_atomic_read(&ct->state, sizeof(ct->state));
|
||||
// instrumentation for the noinstr ct_kernel_enter_state()
|
||||
instrument_atomic_write(&ct->state, sizeof(ct->state));
|
||||
|
||||
incby = 1;
|
||||
} else if (!in_nmi()) {
|
||||
@@ -315,7 +315,7 @@ void noinstr ct_nmi_enter(void)
|
||||
void noinstr ct_idle_enter(void)
|
||||
{
|
||||
WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !raw_irqs_disabled());
|
||||
rcu_eqs_enter(false);
|
||||
ct_kernel_exit(false, RCU_DYNTICKS_IDX + CONTEXT_IDLE);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ct_idle_enter);
|
||||
|
||||
@@ -333,7 +333,7 @@ void noinstr ct_idle_exit(void)
|
||||
unsigned long flags;
|
||||
|
||||
raw_local_irq_save(flags);
|
||||
rcu_eqs_exit(false);
|
||||
ct_kernel_enter(false, RCU_DYNTICKS_IDX - CONTEXT_IDLE);
|
||||
raw_local_irq_restore(flags);
|
||||
}
|
||||
EXPORT_SYMBOL_GPL(ct_idle_exit);
|
||||
@@ -421,8 +421,8 @@ void ct_irq_exit_irqson(void)
|
||||
local_irq_restore(flags);
|
||||
}
|
||||
#else
|
||||
static __always_inline void rcu_eqs_enter(bool user) { }
|
||||
static __always_inline void rcu_eqs_exit(bool user) { }
|
||||
static __always_inline void ct_kernel_exit(bool user, int offset) { }
|
||||
static __always_inline void ct_kernel_enter(bool user, int offset) { }
|
||||
#endif /* #ifdef CONFIG_CONTEXT_TRACKING_IDLE */
|
||||
|
||||
#ifdef CONFIG_CONTEXT_TRACKING_USER
|
||||
@@ -463,6 +463,7 @@ static __always_inline void context_tracking_recursion_exit(void)
|
||||
*/
|
||||
void noinstr __ct_user_enter(enum ctx_state state)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
lockdep_assert_irqs_disabled();
|
||||
|
||||
/* Kernel threads aren't supposed to go to userspace */
|
||||
@@ -471,8 +472,8 @@ void noinstr __ct_user_enter(enum ctx_state state)
|
||||
if (!context_tracking_recursion_enter())
|
||||
return;
|
||||
|
||||
if ( __this_cpu_read(context_tracking.state) != state) {
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
if (__ct_state() != state) {
|
||||
if (ct->active) {
|
||||
/*
|
||||
* At this stage, only low level arch entry code remains and
|
||||
* then we'll run in userspace. We can assume there won't be
|
||||
@@ -492,28 +493,49 @@ void noinstr __ct_user_enter(enum ctx_state state)
|
||||
* that will fire and reschedule once we resume in user/guest mode.
|
||||
*/
|
||||
rcu_irq_work_resched();
|
||||
|
||||
/*
|
||||
* Enter RCU idle mode right before resuming userspace. No use of RCU
|
||||
* is permitted between this call and rcu_eqs_exit(). This way the
|
||||
* CPU doesn't need to maintain the tick for RCU maintenance purposes
|
||||
* when the CPU runs in userspace.
|
||||
*/
|
||||
rcu_eqs_enter(true);
|
||||
ct_kernel_exit(true, RCU_DYNTICKS_IDX + state);
|
||||
|
||||
/*
|
||||
* Special case if we only track user <-> kernel transitions for tickless
|
||||
* cputime accounting but we don't support RCU extended quiescent state.
|
||||
* In this we case we don't care about any concurrency/ordering.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
|
||||
atomic_set(&ct->state, state);
|
||||
} else {
|
||||
/*
|
||||
* Even if context tracking is disabled on this CPU, because it's outside
|
||||
* the full dynticks mask for example, we still have to keep track of the
|
||||
* context transitions and states to prevent inconsistency on those of
|
||||
* other CPUs.
|
||||
* If a task triggers an exception in userspace, sleep on the exception
|
||||
* handler and then migrate to another CPU, that new CPU must know where
|
||||
* the exception returns by the time we call exception_exit().
|
||||
* This information can only be provided by the previous CPU when it called
|
||||
* exception_enter().
|
||||
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||
* is false because we know that CPU is not tickless.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
|
||||
/* Tracking for vtime only, no concurrent RCU EQS accounting */
|
||||
atomic_set(&ct->state, state);
|
||||
} else {
|
||||
/*
|
||||
* Tracking for vtime and RCU EQS. Make sure we don't race
|
||||
* with NMIs. OTOH we don't care about ordering here since
|
||||
* RCU only requires RCU_DYNTICKS_IDX increments to be fully
|
||||
* ordered.
|
||||
*/
|
||||
atomic_add(state, &ct->state);
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Even if context tracking is disabled on this CPU, because it's outside
|
||||
* the full dynticks mask for example, we still have to keep track of the
|
||||
* context transitions and states to prevent inconsistency on those of
|
||||
* other CPUs.
|
||||
* If a task triggers an exception in userspace, sleep on the exception
|
||||
* handler and then migrate to another CPU, that new CPU must know where
|
||||
* the exception returns by the time we call exception_exit().
|
||||
* This information can only be provided by the previous CPU when it called
|
||||
* exception_enter().
|
||||
* OTOH we can spare the calls to vtime and RCU when context_tracking.active
|
||||
* is false because we know that CPU is not tickless.
|
||||
*/
|
||||
__this_cpu_write(context_tracking.state, state);
|
||||
}
|
||||
context_tracking_recursion_exit();
|
||||
}
|
||||
@@ -581,24 +603,47 @@ NOKPROBE_SYMBOL(user_enter_callable);
|
||||
*/
|
||||
void noinstr __ct_user_exit(enum ctx_state state)
|
||||
{
|
||||
struct context_tracking *ct = this_cpu_ptr(&context_tracking);
|
||||
|
||||
if (!context_tracking_recursion_enter())
|
||||
return;
|
||||
|
||||
if (__this_cpu_read(context_tracking.state) == state) {
|
||||
if (__this_cpu_read(context_tracking.active)) {
|
||||
if (__ct_state() == state) {
|
||||
if (ct->active) {
|
||||
/*
|
||||
* Exit RCU idle mode while entering the kernel because it can
|
||||
* run a RCU read side critical section anytime.
|
||||
*/
|
||||
rcu_eqs_exit(true);
|
||||
ct_kernel_enter(true, RCU_DYNTICKS_IDX - state);
|
||||
if (state == CONTEXT_USER) {
|
||||
instrumentation_begin();
|
||||
vtime_user_exit(current);
|
||||
trace_user_exit(0);
|
||||
instrumentation_end();
|
||||
}
|
||||
|
||||
/*
|
||||
* Special case if we only track user <-> kernel transitions for tickless
|
||||
* cputime accounting but we don't support RCU extended quiescent state.
|
||||
* In this we case we don't care about any concurrency/ordering.
|
||||
*/
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE))
|
||||
atomic_set(&ct->state, CONTEXT_KERNEL);
|
||||
|
||||
} else {
|
||||
if (!IS_ENABLED(CONFIG_CONTEXT_TRACKING_IDLE)) {
|
||||
/* Tracking for vtime only, no concurrent RCU EQS accounting */
|
||||
atomic_set(&ct->state, CONTEXT_KERNEL);
|
||||
} else {
|
||||
/*
|
||||
* Tracking for vtime and RCU EQS. Make sure we don't race
|
||||
* with NMIs. OTOH we don't care about ordering here since
|
||||
* RCU only requires RCU_DYNTICKS_IDX increments to be fully
|
||||
* ordered.
|
||||
*/
|
||||
atomic_sub(state, &ct->state);
|
||||
}
|
||||
}
|
||||
__this_cpu_write(context_tracking.state, CONTEXT_KERNEL);
|
||||
}
|
||||
context_tracking_recursion_exit();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user