From 872c63fbf9e153146b07f0cece4da0d70b283eeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 5 Sep 2016 11:56:05 +0100 Subject: [PATCH 1/2] arm64: spinlocks: implement smp_mb__before_spinlock() as smp_mb() smp_mb__before_spinlock() is intended to upgrade a spin_lock() operation to a full barrier, such that prior stores are ordered with respect to loads and stores occuring inside the critical section. Unfortunately, the core code defines the barrier as smp_wmb(), which is insufficient to provide the required ordering guarantees when used in conjunction with our load-acquire-based spinlock implementation. This patch overrides the arm64 definition of smp_mb__before_spinlock() to map to a full smp_mb(). Cc: Cc: Peter Zijlstra Reported-by: Alan Stern Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/spinlock.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index e875a5a551d7..89206b568cd4 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -363,4 +363,14 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() +/* + * Accesses appearing in program order before a spin_lock() operation + * can be reordered with accesses inside the critical section, by virtue + * of arch_spin_lock being constructed using acquire semantics. + * + * In cases where this is problematic (e.g. try_to_wake_up), an + * smp_mb__before_spinlock() can restore the required ordering. + */ +#define smp_mb__before_spinlock() smp_mb() + #endif /* __ASM_SPINLOCK_H */ From 2b9743441a312e0b0a2d87deae363eccbe9d0f00 Mon Sep 17 00:00:00 2001 From: Chunyan Zhang Date: Thu, 8 Sep 2016 20:46:42 +0800 Subject: [PATCH 2/2] arm64: use preempt_disable_notrace in _percpu_read/write When debug preempt or preempt tracer is enabled, preempt_count_add/sub() can be traced by function and function graph tracing, and preempt_disable/enable() would call preempt_count_add/sub(), so in Ftrace subsystem we should use preempt_disable/enable_notrace instead. In the commit 345ddcc882d8 ("ftrace: Have set_ftrace_pid use the bitmap like events do") the function this_cpu_read() was added to trace_graph_entry(), and if this_cpu_read() calls preempt_disable(), graph tracer will go into a recursive loop, even if the tracing_on is disabled. So this patch change to use preempt_enable/disable_notrace instead in this_cpu_read(). Since Yonghui Yang helped a lot to find the root cause of this problem, so also add his SOB. Signed-off-by: Yonghui Yang Signed-off-by: Chunyan Zhang Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/percpu.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/percpu.h b/arch/arm64/include/asm/percpu.h index 0a456bef8c79..2fee2f59288c 100644 --- a/arch/arm64/include/asm/percpu.h +++ b/arch/arm64/include/asm/percpu.h @@ -199,19 +199,19 @@ static inline unsigned long __percpu_xchg(void *ptr, unsigned long val, #define _percpu_read(pcp) \ ({ \ typeof(pcp) __retval; \ - preempt_disable(); \ + preempt_disable_notrace(); \ __retval = (typeof(pcp))__percpu_read(raw_cpu_ptr(&(pcp)), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ __retval; \ }) #define _percpu_write(pcp, val) \ do { \ - preempt_disable(); \ + preempt_disable_notrace(); \ __percpu_write(raw_cpu_ptr(&(pcp)), (unsigned long)(val), \ sizeof(pcp)); \ - preempt_enable(); \ + preempt_enable_notrace(); \ } while(0) \ #define _pcp_protect(operation, pcp, val) \