From 0539084639f3835c8d0b798e6659ec14a266b4f1 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 20 Mar 2024 09:30:40 +0100 Subject: [PATCH 01/11] x86/percpu: Convert this_percpu_xchg_op() from asm() to C code, to generate better code Rewrite percpu_xchg_op() using generic percpu primitives instead of using asm. The new implementation is similar to local_xchg() and allows the compiler to perform various optimizations: e.g. the compiler is able to create fast path through the loop, according to likely/unlikely annotations in percpu_try_cmpxchg_op(). No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240320083127.493250-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 32 +++++++++++--------------------- 1 file changed, 11 insertions(+), 21 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 44958ebaf626..de991e6d050a 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -230,25 +230,15 @@ do { \ }) /* - * xchg is implemented using cmpxchg without a lock prefix. xchg is - * expensive due to the implied lock prefix. The processor cannot prefetch - * cachelines if xchg is used. + * this_cpu_xchg() is implemented using cmpxchg without a lock prefix. + * xchg is expensive due to the implied lock prefix. The processor + * cannot prefetch cachelines if xchg is used. */ -#define percpu_xchg_op(size, qual, _var, _nval) \ +#define this_percpu_xchg_op(_var, _nval) \ ({ \ - __pcpu_type_##size pxo_old__; \ - __pcpu_type_##size pxo_new__ = __pcpu_cast_##size(_nval); \ - asm qual (__pcpu_op2_##size("mov", __percpu_arg([var]), \ - "%[oval]") \ - "\n1:\t" \ - __pcpu_op2_##size("cmpxchg", "%[nval]", \ - __percpu_arg([var])) \ - "\n\tjnz 1b" \ - : [oval] "=&a" (pxo_old__), \ - [var] "+m" (__my_cpu_var(_var)) \ - : [nval] __pcpu_reg_##size(, pxo_new__) \ - : "memory"); \ - (typeof(_var))(unsigned long) pxo_old__; \ + typeof(_var) pxo_old__ = this_cpu_read(_var); \ + do { } while (!this_cpu_try_cmpxchg(_var, &pxo_old__, _nval)); \ + pxo_old__; \ }) /* @@ -534,9 +524,9 @@ do { \ #define this_cpu_or_1(pcp, val) percpu_to_op(1, volatile, "or", (pcp), val) #define this_cpu_or_2(pcp, val) percpu_to_op(2, volatile, "or", (pcp), val) #define this_cpu_or_4(pcp, val) percpu_to_op(4, volatile, "or", (pcp), val) -#define this_cpu_xchg_1(pcp, nval) percpu_xchg_op(1, volatile, pcp, nval) -#define this_cpu_xchg_2(pcp, nval) percpu_xchg_op(2, volatile, pcp, nval) -#define this_cpu_xchg_4(pcp, nval) percpu_xchg_op(4, volatile, pcp, nval) +#define this_cpu_xchg_1(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_2(pcp, nval) this_percpu_xchg_op(pcp, nval) +#define this_cpu_xchg_4(pcp, nval) this_percpu_xchg_op(pcp, nval) #define raw_cpu_add_return_1(pcp, val) percpu_add_return_op(1, , pcp, val) #define raw_cpu_add_return_2(pcp, val) percpu_add_return_op(2, , pcp, val) @@ -575,7 +565,7 @@ do { \ #define this_cpu_and_8(pcp, val) percpu_to_op(8, volatile, "and", (pcp), val) #define this_cpu_or_8(pcp, val) percpu_to_op(8, volatile, "or", (pcp), val) #define this_cpu_add_return_8(pcp, val) percpu_add_return_op(8, volatile, pcp, val) -#define this_cpu_xchg_8(pcp, nval) percpu_xchg_op(8, volatile, pcp, nval) +#define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) #endif From ce99b9c8daff3352a2ae0c72acf44e0663095fea Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 20 Mar 2024 09:30:41 +0100 Subject: [PATCH 02/11] x86/percpu: Move raw_percpu_xchg_op() to a better place Move raw_percpu_xchg_op() together with this_percpu_xchg_op() and trivially rename some internal variables to harmonize them between macro implementations. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240320083127.493250-2-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index de991e6d050a..7563e69838c4 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -229,6 +229,17 @@ do { \ (typeof(_var))(unsigned long) (paro_tmp__ + _val); \ }) +/* + * raw_cpu_xchg() can use a load-store since + * it is not required to be IRQ-safe. + */ +#define raw_percpu_xchg_op(_var, _nval) \ +({ \ + typeof(_var) pxo_old__ = raw_cpu_read(_var); \ + raw_cpu_write(_var, _nval); \ + pxo_old__; \ +}) + /* * this_cpu_xchg() is implemented using cmpxchg without a lock prefix. * xchg is expensive due to the implied lock prefix. The processor @@ -499,18 +510,6 @@ do { \ #define raw_cpu_or_1(pcp, val) percpu_to_op(1, , "or", (pcp), val) #define raw_cpu_or_2(pcp, val) percpu_to_op(2, , "or", (pcp), val) #define raw_cpu_or_4(pcp, val) percpu_to_op(4, , "or", (pcp), val) - -/* - * raw_cpu_xchg() can use a load-store since it is not required to be - * IRQ-safe. - */ -#define raw_percpu_xchg_op(var, nval) \ -({ \ - typeof(var) pxo_ret__ = raw_cpu_read(var); \ - raw_cpu_write(var, (nval)); \ - pxo_ret__; \ -}) - #define raw_cpu_xchg_1(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_2(pcp, val) raw_percpu_xchg_op(pcp, val) #define raw_cpu_xchg_4(pcp, val) raw_percpu_xchg_op(pcp, val) From f61f02d1ff788ae5ad485ef8edd88d9c93557994 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 20 Mar 2024 13:45:49 +0100 Subject: [PATCH 03/11] x86/percpu: Re-enable named address spaces with KASAN for GCC 13.3+ Commit: 68fb3ca0e408 ("x86/percpu: Disable named address spaces for KASAN") ... disabled support for named address spaces with KASAN due to the incompatibility issue between named AS and KASAN. GCC 13.3 has fixed this issue (GCC PR sanitizer/111736) so the support for named address spaces can be re-enabled with KASAN for GCC compiler version >= 13.3. Note that the patch considers GCC 14 to be fixed - if somebody is using snapshots of the GCC 14 before the fix, they should upgrade. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Josh Poimboeuf Cc: Ard Biesheuvel Cc: Jakub Jelinek Cc: Nick Desaulniers Cc: Sean Christopherson Link: https://lore.kernel.org/r/20240320124603.566923-1-ubizjak@gmail.com --- arch/x86/Kconfig | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 7aed87cbf386..09455d93b947 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2435,14 +2435,17 @@ endmenu config CC_HAS_NAMED_AS def_bool CC_IS_GCC && GCC_VERSION >= 120100 +config CC_HAS_NAMED_AS_FIXED_ASAN + def_bool CC_IS_GCC && GCC_VERSION >= 130300 + config USE_X86_SEG_SUPPORT def_bool y depends on CC_HAS_NAMED_AS # - # -fsanitize=kernel-address (KASAN) is at the moment incompatible - # with named address spaces - see GCC PR sanitizer/111736. + # -fsanitize=kernel-address (KASAN) is incompatible with named + # address spaces with GCC < 13.3 - see GCC PR sanitizer/111736. # - depends on !KASAN + depends on !KASAN || CC_HAS_NAMED_AS_FIXED_ASAN config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) From b429eafe0d9f765d8626e53221ce3108b783da5e Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 21 Mar 2024 17:46:41 +0100 Subject: [PATCH 04/11] x86/percpu: Enable named address spaces for GCC 9.1+ Enable named address spaces also for GCC 9, GCC 10 and GCC 11 releases. These compilers all produce kernel images that boot without problems. GCC_VERSION cutoff is arbitrary. It is primary a risk-reduction cutoff, older compilers will be tested and enabled in linux 6.10 release. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Ard Biesheuvel Cc: Kees Cook Link: https://lore.kernel.org/r/20240321164647.289879-1-ubizjak@gmail.com --- arch/x86/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 09455d93b947..03c9d1100f09 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2433,7 +2433,7 @@ source "kernel/livepatch/Kconfig" endmenu config CC_HAS_NAMED_AS - def_bool CC_IS_GCC && GCC_VERSION >= 120100 + def_bool CC_IS_GCC && GCC_VERSION >= 90100 config CC_HAS_NAMED_AS_FIXED_ASAN def_bool CC_IS_GCC && GCC_VERSION >= 130300 From 4e5b0e8003df05983b6dabcdde7ff447d53b49d7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Fri, 22 Mar 2024 11:27:14 +0100 Subject: [PATCH 05/11] x86/percpu: Unify arch_raw_cpu_ptr() defines When building a 32-bit vDSO for a 64-bit kernel, games are played with CONFIG_X86_64. {this,raw}_cpu_read_8() macros are conditionally defined on CONFIG_X86_64 and when CONFIG_X86_64 is undefined in fake_32bit_build.h various build failures in generic percpu header files can happen. To make things worse, the build of 32-bit vDSO for a 64-bit kernel grew dependency on arch_raw_cpu_ptr() macro and the build fails if arch_raw_cpu_ptr() macro is not defined. To mitigate these issues, x86 carefully defines arch_raw_cpu_ptr() to avoid any dependency on raw_cpu_read_8() and thus CONFIG_X86_64. W/o segment register support, the definition uses size-agnostic MOV asm mnemonic and hopes that _ptr argument won't ever be 64-bit size on 32-bit targets (although newer GCCs warn for this situation with "unsupported size for integer register"), and w/ segment register support the definition uses size-agnostic __raw_cpu_read() macro. Fortunately, raw_cpu_read() is not used in 32-bit vDSO for a 64-bit kernel. However, we can't simply omit the definition of arch_raw_cpu_read(), since the build will fail when building vdso/vdso32/vclock_gettime.o. The patch defines arch_raw_cpu_ptr to BUILD_BUG() when BUILD_VDSO32_64 macro is defined. This way, we are sure that arch_raw_cpu_ptr() won't actually be used in 32-bit VDSO for a 64-bit kernel, but it is still defined to prevent build failure. Finally, we can unify arch_raw_cpu_ptr() between builds w/ and w/o x86 segment register support, substituting two tricky macro definitions with a straightforward implementation. There is no size difference and no difference in number of this_cpu_off accesses between patched and unpatched kernel when the kernel is built either w/ and w/o segment register support. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240322102730.209141-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 42 +++++++++++++++-------------------- 1 file changed, 18 insertions(+), 24 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 7563e69838c4..f6ddbaaf80bc 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -59,36 +59,30 @@ #define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset this_cpu_read(this_cpu_off) -#ifdef CONFIG_USE_X86_SEG_SUPPORT -/* - * Efficient implementation for cases in which the compiler supports - * named address spaces. Allows the compiler to perform additional - * optimizations that can save more instructions. - */ -#define arch_raw_cpu_ptr(ptr) \ -({ \ - unsigned long tcp_ptr__; \ - tcp_ptr__ = __raw_cpu_read(, this_cpu_off); \ - \ - tcp_ptr__ += (unsigned long)(ptr); \ - (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ -}) -#else /* CONFIG_USE_X86_SEG_SUPPORT */ +#ifdef CONFIG_X86_64 +#define __raw_my_cpu_offset raw_cpu_read_8(this_cpu_off); +#else +#define __raw_my_cpu_offset raw_cpu_read_4(this_cpu_off); +#endif + /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. + * + * arch_raw_cpu_ptr should not be used in 32-bit VDSO for a 64-bit + * kernel, because games are played with CONFIG_X86_64 there and + * sizeof(this_cpu_off) becames 4. */ -#define arch_raw_cpu_ptr(ptr) \ +#ifndef BUILD_VDSO32_64 +#define arch_raw_cpu_ptr(_ptr) \ ({ \ - unsigned long tcp_ptr__; \ - asm ("mov " __percpu_arg(1) ", %0" \ - : "=r" (tcp_ptr__) \ - : "m" (__my_cpu_var(this_cpu_off))); \ - \ - tcp_ptr__ += (unsigned long)(ptr); \ - (typeof(*(ptr)) __kernel __force *)tcp_ptr__; \ + unsigned long tcp_ptr__ = __raw_my_cpu_offset; \ + tcp_ptr__ += (unsigned long)(_ptr); \ + (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ }) -#endif /* CONFIG_USE_X86_SEG_SUPPORT */ +#else +#define arch_raw_cpu_ptr(_ptr) ({ BUILD_BUG(); (typeof(_ptr))0; }) +#endif #define PER_CPU_VAR(var) %__percpu_seg:(var)__percpu_rel From b90169b42a6f49ff2fe2e4d4ed0bbcf17fb8f1bf Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Sun, 24 Mar 2024 22:19:45 +0100 Subject: [PATCH 06/11] x86/percpu: Do not use this_cpu_read_stable_8() for 32-bit targets this_cpu_read_stable() macro uses __pcpu_size_call_return() that unconditionally calls this_cpu_read_stable_8() also for 32-bit targets. This usage is ivalid as it will result in the generation of 64-bit MOVQ instruction on 32-bit targets via percpu_stable_op() macro. Since there is no generic support for this_cpu_read_stable_8() for 32-bit targets, the patch defines this_cpu_read_stable_8() to BUILD_BUG() when CONFIG_X86_64 is not defined. This way, we are sure that this_cpu_read_stable_8() won't actually be used for 32-bit targets, but it is still defined to prevent build failure. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Andy Lutomirski Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Ard Biesheuvel Link: https://lore.kernel.org/r/20240324212014.310189-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index f6ddbaaf80bc..1f6404e0c428 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -423,10 +423,6 @@ do { \ * actually per-thread variables implemented as per-CPU variables and * thus stable for the duration of the respective task. */ -#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) -#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) -#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp) -#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) #define this_cpu_read_stable(pcp) __pcpu_size_call_return(this_cpu_read_stable_, pcp) #ifdef CONFIG_USE_X86_SEG_SUPPORT @@ -495,6 +491,10 @@ do { \ #define this_cpu_read_const(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif /* CONFIG_USE_X86_SEG_SUPPORT */ +#define this_cpu_read_stable_1(pcp) percpu_stable_op(1, "mov", pcp) +#define this_cpu_read_stable_2(pcp) percpu_stable_op(2, "mov", pcp) +#define this_cpu_read_stable_4(pcp) percpu_stable_op(4, "mov", pcp) + #define raw_cpu_add_1(pcp, val) percpu_add_op(1, , (pcp), val) #define raw_cpu_add_2(pcp, val) percpu_add_op(2, , (pcp), val) #define raw_cpu_add_4(pcp, val) percpu_add_op(4, , (pcp), val) @@ -546,6 +546,8 @@ do { \ * 32 bit must fall back to generic operations. */ #ifdef CONFIG_X86_64 +#define this_cpu_read_stable_8(pcp) percpu_stable_op(8, "mov", pcp) + #define raw_cpu_add_8(pcp, val) percpu_add_op(8, , (pcp), val) #define raw_cpu_and_8(pcp, val) percpu_to_op(8, , "and", (pcp), val) #define raw_cpu_or_8(pcp, val) percpu_to_op(8, , "or", (pcp), val) @@ -561,6 +563,9 @@ do { \ #define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) +#else +/* There is no generic 64 bit read stable operation for 32 bit targets. */ +#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, From a55c1fdad5f61b4bfe42319694b23671a758cb28 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 2 Apr 2024 19:50:38 +0200 Subject: [PATCH 07/11] x86/percpu: Use __force to cast from __percpu address space Fix Sparse warning when casting from __percpu address space by using __force in the cast. x86 named address spaces are not considered to be subspaces of the generic (flat) address space, so explicit casts are required to convert pointers between these address spaces and the generic address space (the application should cast to uintptr_t and apply the segment base offset). The cast to uintptr_t removes __percpu address space tag and Sparse reports: warning: cast removes address space '__percpu' of expression Use __force to inform Sparse that the cast is intentional. Fixes: 9a462b9eafa6 ("x86/percpu: Use compiler segment prefix qualifier") Reported-by: Charlemagne Lasse Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240402175058.52649-1-ubizjak@gmail.com Closes: https://lore.kernel.org/lkml/CAFGhKbzev7W4aHwhFPWwMZQEHenVgZUj7=aunFieVqZg3mt14A@mail.gmail.com/ --- arch/x86/include/asm/percpu.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 1f6404e0c428..20696df5d567 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -77,7 +77,7 @@ #define arch_raw_cpu_ptr(_ptr) \ ({ \ unsigned long tcp_ptr__ = __raw_my_cpu_offset; \ - tcp_ptr__ += (unsigned long)(_ptr); \ + tcp_ptr__ += (__force unsigned long)(_ptr); \ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ }) #else @@ -96,8 +96,8 @@ #endif /* CONFIG_SMP */ #define __my_cpu_type(var) typeof(var) __percpu_seg_override -#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr) *)(uintptr_t)(ptr) -#define __my_cpu_var(var) (*__my_cpu_ptr(&var)) +#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr)*)(__force uintptr_t)(ptr) +#define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x From 9ebe5500d4b25ee4cde04eec59a6764361a60709 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Tue, 2 Apr 2024 14:19:08 +0200 Subject: [PATCH 08/11] x86/percpu: Re-enable named address spaces with sanitizers for GCC 13.3+ Commit: b6540de9b5c8 ("x86/percpu: Disable named address spaces for KASAN") ... disabled support for named address spaces with KCSAN due to the incompatibility issue between named AS and KCSAN. GCC 13.3 has fixed this issue (GCC PR sanitizer/111736) so the support for named address spaces can be re-enabled with sanitizers for GCC compiler version >= 13.3. [ Note that the patch considers GCC 14 to be fixed - if somebody is using snapshots of the GCC 14 before the fix, they should upgrade. ] Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240402121926.78477-1-ubizjak@gmail.com --- arch/x86/Kconfig | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1c020d21617..9332badb48ea 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2431,19 +2431,18 @@ endmenu config CC_HAS_NAMED_AS def_bool CC_IS_GCC && GCC_VERSION >= 90100 -config CC_HAS_NAMED_AS_FIXED_ASAN +config CC_HAS_NAMED_AS_FIXED_SANITIZERS def_bool CC_IS_GCC && GCC_VERSION >= 130300 config USE_X86_SEG_SUPPORT def_bool y depends on CC_HAS_NAMED_AS # - # -fsanitize=kernel-address (KASAN) is incompatible with named - # address spaces with GCC < 13.3 - see GCC PR sanitizer/111736. + # -fsanitize=kernel-address (KASAN) and -fsanitize=thread + # (KCSAN) are incompatible with named address spaces with + # GCC < 13.3 - see GCC PR sanitizer/111736. # - depends on !KASAN || CC_HAS_NAMED_AS_FIXED_ASAN - # -fsanitize=thread (KCSAN) is also incompatible. - depends on !KCSAN + depends on !(KASAN || KCSAN) || CC_HAS_NAMED_AS_FIXED_SANITIZERS config CC_HAS_SLS def_bool $(cc-option,-mharden-sls=all) From 4c3677c077582f8665806def3f6dd35587793c69 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 4 Apr 2024 11:42:01 +0200 Subject: [PATCH 09/11] x86/percpu: Fix x86_this_cpu_variable_test_bit() asm template Fix x86_this_cpu_variable_test_bit(), which is implemented with an incorrect asm template, where argument 2 (count argument) is considered a percpu variable. However, x86_this_cpu_test_bit() is currently used exclusively with constant bit number argument, so the called x86_this_cpu_variable_test_bit() function is never instantiated. The fix introduces named assembler operands to prevent this kind of error. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20240404094218.448963-1-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index 20696df5d567..cbfbbe836ee2 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -586,10 +586,11 @@ static inline bool x86_this_cpu_variable_test_bit(int nr, { bool oldbit; - asm volatile("btl "__percpu_arg(2)",%1" + asm volatile("btl %[nr], " __percpu_arg([var]) CC_SET(c) : CC_OUT(c) (oldbit) - : "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), "Ir" (nr)); + : [var] "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), + [nr] "Ir" (nr)); return oldbit; } From a3f8a3a2cf0b7b3ccb51ee60d51c0b5435c7135a Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 4 Apr 2024 11:42:02 +0200 Subject: [PATCH 10/11] x86/percpu: Rewrite x86_this_cpu_test_bit() and friends as macros Rewrite the whole family of x86_this_cpu_test_bit() functions as macros, so standard __my_cpu_var() and raw_cpu_read() macros can be used on percpu variables. This approach considerably simplifies implementation of functions and also introduces standard checks on accessed percpu variables. No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240404094218.448963-2-ubizjak@gmail.com --- arch/um/include/asm/cpufeature.h | 3 +- arch/x86/include/asm/cpufeature.h | 3 +- arch/x86/include/asm/percpu.h | 52 +++++++++++++------------------ 3 files changed, 24 insertions(+), 34 deletions(-) diff --git a/arch/um/include/asm/cpufeature.h b/arch/um/include/asm/cpufeature.h index 66fe06db872f..1eb8b834fbec 100644 --- a/arch/um/include/asm/cpufeature.h +++ b/arch/um/include/asm/cpufeature.h @@ -38,8 +38,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, \ - (unsigned long __percpu *)&cpu_info.x86_capability)) + x86_this_cpu_test_bit(bit, cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index 42157ddcc09d..cd4c02dc9c8b 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -127,8 +127,7 @@ extern const char * const x86_bug_flags[NBUGINTS*32]; #define this_cpu_has(bit) \ (__builtin_constant_p(bit) && REQUIRED_MASK_BIT_SET(bit) ? 1 : \ - x86_this_cpu_test_bit(bit, \ - (unsigned long __percpu *)&cpu_info.x86_capability)) + x86_this_cpu_test_bit(bit, cpu_info.x86_capability)) /* * This macro is for detection of features which need kernel diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index cbfbbe836ee2..d6ff0db32209 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -96,7 +96,7 @@ #endif /* CONFIG_SMP */ #define __my_cpu_type(var) typeof(var) __percpu_seg_override -#define __my_cpu_ptr(ptr) (__my_cpu_type(*ptr)*)(__force uintptr_t)(ptr) +#define __my_cpu_ptr(ptr) (__my_cpu_type(*(ptr))*)(__force uintptr_t)(ptr) #define __my_cpu_var(var) (*__my_cpu_ptr(&(var))) #define __percpu_arg(x) __percpu_prefix "%" #x #define __force_percpu_arg(x) __force_percpu_prefix "%" #x @@ -568,37 +568,29 @@ do { \ #define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) #endif -static __always_inline bool x86_this_cpu_constant_test_bit(unsigned int nr, - const unsigned long __percpu *addr) -{ - unsigned long __percpu *a = - (unsigned long __percpu *)addr + nr / BITS_PER_LONG; +#define x86_this_cpu_constant_test_bit(_nr, _var) \ +({ \ + unsigned long __percpu *addr__ = \ + (unsigned long __percpu *)&(_var) + ((_nr) / BITS_PER_LONG); \ + !!((1UL << ((_nr) % BITS_PER_LONG)) & raw_cpu_read(*addr__)); \ +}) -#ifdef CONFIG_X86_64 - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_8(*a)) != 0; -#else - return ((1UL << (nr % BITS_PER_LONG)) & raw_cpu_read_4(*a)) != 0; -#endif -} +#define x86_this_cpu_variable_test_bit(_nr, _var) \ +({ \ + bool oldbit; \ + \ + asm volatile("btl %[nr], " __percpu_arg([var]) \ + CC_SET(c) \ + : CC_OUT(c) (oldbit) \ + : [var] "m" (__my_cpu_var(_var)), \ + [nr] "rI" (_nr)); \ + oldbit; \ +}) -static inline bool x86_this_cpu_variable_test_bit(int nr, - const unsigned long __percpu *addr) -{ - bool oldbit; - - asm volatile("btl %[nr], " __percpu_arg([var]) - CC_SET(c) - : CC_OUT(c) (oldbit) - : [var] "m" (*__my_cpu_ptr((unsigned long __percpu *)(addr))), - [nr] "Ir" (nr)); - - return oldbit; -} - -#define x86_this_cpu_test_bit(nr, addr) \ - (__builtin_constant_p((nr)) \ - ? x86_this_cpu_constant_test_bit((nr), (addr)) \ - : x86_this_cpu_variable_test_bit((nr), (addr))) +#define x86_this_cpu_test_bit(_nr, _var) \ + (__builtin_constant_p(_nr) \ + ? x86_this_cpu_constant_test_bit(_nr, _var) \ + : x86_this_cpu_variable_test_bit(_nr, _var)) #include From 93cfa544cf9e4771def159002304a2e366cd97af Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Thu, 4 Apr 2024 11:42:03 +0200 Subject: [PATCH 11/11] x86/percpu: Introduce raw_cpu_read_long() to reduce ifdeffery Introduce raw_cpu_read_long() macro to slightly reduce ifdeffery in . No functional changes intended. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Linus Torvalds Link: https://lore.kernel.org/r/20240404094218.448963-3-ubizjak@gmail.com --- arch/x86/include/asm/percpu.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h index d6ff0db32209..3bedee1801e2 100644 --- a/arch/x86/include/asm/percpu.h +++ b/arch/x86/include/asm/percpu.h @@ -59,12 +59,6 @@ #define __force_percpu_prefix "%%"__stringify(__percpu_seg)":" #define __my_cpu_offset this_cpu_read(this_cpu_off) -#ifdef CONFIG_X86_64 -#define __raw_my_cpu_offset raw_cpu_read_8(this_cpu_off); -#else -#define __raw_my_cpu_offset raw_cpu_read_4(this_cpu_off); -#endif - /* * Compared to the generic __my_cpu_offset version, the following * saves one instruction and avoids clobbering a temp register. @@ -76,7 +70,7 @@ #ifndef BUILD_VDSO32_64 #define arch_raw_cpu_ptr(_ptr) \ ({ \ - unsigned long tcp_ptr__ = __raw_my_cpu_offset; \ + unsigned long tcp_ptr__ = raw_cpu_read_long(this_cpu_off); \ tcp_ptr__ += (__force unsigned long)(_ptr); \ (typeof(*(_ptr)) __kernel __force *)tcp_ptr__; \ }) @@ -563,9 +557,13 @@ do { \ #define this_cpu_xchg_8(pcp, nval) this_percpu_xchg_op(pcp, nval) #define this_cpu_cmpxchg_8(pcp, oval, nval) percpu_cmpxchg_op(8, volatile, pcp, oval, nval) #define this_cpu_try_cmpxchg_8(pcp, ovalp, nval) percpu_try_cmpxchg_op(8, volatile, pcp, ovalp, nval) + +#define raw_cpu_read_long(pcp) raw_cpu_read_8(pcp) #else /* There is no generic 64 bit read stable operation for 32 bit targets. */ -#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) +#define this_cpu_read_stable_8(pcp) ({ BUILD_BUG(); (typeof(pcp))0; }) + +#define raw_cpu_read_long(pcp) raw_cpu_read_4(pcp) #endif #define x86_this_cpu_constant_test_bit(_nr, _var) \