From 010c4a461c1dbf3fa75ddea8df018a6128b700c6 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:43 -0800 Subject: [PATCH 1/7] x86/speculation: Simplify and make CALL_NOSPEC consistent CALL_NOSPEC macro is used to generate Spectre-v2 mitigation friendly indirect branches. At compile time the macro defaults to indirect branch, and at runtime those can be patched to thunk based mitigations. This approach is opposite of what is done for the rest of the kernel, where the compile time default is to replace indirect calls with retpoline thunk calls. Make CALL_NOSPEC consistent with the rest of the kernel, default to retpoline thunk at compile time when CONFIG_MITIGATION_RETPOLINE is enabled. Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-1-96599fed0f33@linux.intel.com --- arch/x86/include/asm/nospec-branch.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 7e8bf78c03d5..1e6b915ce956 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -424,16 +424,11 @@ static inline void call_depth_return_thunk(void) {} * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ -# define CALL_NOSPEC \ - ALTERNATIVE_2( \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - "call __x86_indirect_thunk_%V[thunk_target]\n", \ - X86_FEATURE_RETPOLINE, \ - "lfence;\n" \ - ANNOTATE_RETPOLINE_SAFE \ - "call *%[thunk_target]\n", \ - X86_FEATURE_RETPOLINE_LFENCE) +#ifdef CONFIG_MITIGATION_RETPOLINE +#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#else +#define CALL_NOSPEC "call *%[thunk_target]\n" +#endif # define THUNK_TARGET(addr) [thunk_target] "r" (addr) From 9af9ad85ac44cb754e526d468c3006b48db5dfd8 Mon Sep 17 00:00:00 2001 From: Pawan Gupta Date: Fri, 28 Feb 2025 18:35:58 -0800 Subject: [PATCH 2/7] x86/speculation: Add a conditional CS prefix to CALL_NOSPEC Retpoline mitigation for spectre-v2 uses thunks for indirect branches. To support this mitigation compilers add a CS prefix with -mindirect-branch-cs-prefix. For an indirect branch in asm, this needs to be added manually. CS prefix is already being added to indirect branches in asm files, but not in inline asm. Add CS prefix to CALL_NOSPEC for inline asm as well. There is no JMP_NOSPEC for inline asm. Reported-by: Josh Poimboeuf Signed-off-by: Pawan Gupta Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: Peter Zijlstra Link: https://lore.kernel.org/r/20250228-call-nospec-v3-2-96599fed0f33@linux.intel.com --- arch/x86/include/asm/nospec-branch.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 1e6b915ce956..aee26bb8230f 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -198,9 +198,8 @@ .endm /* - * Equivalent to -mindirect-branch-cs-prefix; emit the 5 byte jmp/call - * to the retpoline thunk with a CS prefix when the register requires - * a RAX prefix byte to encode. Also see apply_retpolines(). + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. */ .macro __CS_PREFIX reg:req .irp rs,r8,r9,r10,r11,r12,r13,r14,r15 @@ -420,12 +419,24 @@ static inline void call_depth_return_thunk(void) {} #ifdef CONFIG_X86_64 +/* + * Emits a conditional CS prefix that is compatible with + * -mindirect-branch-cs-prefix. + */ +#define __CS_PREFIX(reg) \ + ".irp rs,r8,r9,r10,r11,r12,r13,r14,r15\n" \ + ".ifc \\rs," reg "\n" \ + ".byte 0x2e\n" \ + ".endif\n" \ + ".endr\n" + /* * Inline asm uses the %V modifier which is only in newer GCC * which is ensured when CONFIG_MITIGATION_RETPOLINE is defined. */ #ifdef CONFIG_MITIGATION_RETPOLINE -#define CALL_NOSPEC "call __x86_indirect_thunk_%V[thunk_target]\n" +#define CALL_NOSPEC __CS_PREFIX("%V[thunk_target]") \ + "call __x86_indirect_thunk_%V[thunk_target]\n" #else #define CALL_NOSPEC "call *%[thunk_target]\n" #endif From 8177c6bedb7013cf736137da586cf783922309dd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:12 +0100 Subject: [PATCH 3/7] x86/cacheinfo: Validate CPUID leaf 0x2 EDX output CPUID leaf 0x2 emits one-byte descriptors in its four output registers EAX, EBX, ECX, and EDX. For these descriptors to be valid, the most significant bit (MSB) of each register must be clear. The historical Git commit: 019361a20f016 ("- pre6: Intel: start to add Pentium IV specific stuff (128-byte cacheline etc)...") introduced leaf 0x2 output parsing. It only validated the MSBs of EAX, EBX, and ECX, but left EDX unchecked. Validate EDX's most-significant bit. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@vger.kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-2-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e6fa03ed9172..a6c6bccfa8b8 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -808,7 +808,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; From 1881148215c67151b146450fb89ec22fd92337a7 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:13 +0100 Subject: [PATCH 4/7] x86/cpu: Validate CPUID leaf 0x2 EDX output CPUID leaf 0x2 emits one-byte descriptors in its four output registers EAX, EBX, ECX, and EDX. For these descriptors to be valid, the most significant bit (MSB) of each register must be clear. Leaf 0x2 parsing at intel.c only validated the MSBs of EAX, EBX, and ECX, but left EDX unchecked. Validate EDX's most-significant bit as well. Fixes: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-3-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3dce22f00dc3..2a3716afee63 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -799,7 +799,7 @@ static void intel_detect_tlb(struct cpuinfo_x86 *c) cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 3 ; j++) + for (j = 0 ; j < 4 ; j++) if (regs[j] & (1 << 31)) regs[j] = 0; From f6bdaab79ee4228a143ee1b4cb80416d6ffc0c63 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Tue, 4 Mar 2025 09:51:14 +0100 Subject: [PATCH 5/7] x86/cpu: Properly parse CPUID leaf 0x2 TLB descriptor 0x63 CPUID leaf 0x2's one-byte TLB descriptors report the number of entries for specific TLB types, among other properties. Typically, each emitted descriptor implies the same number of entries for its respective TLB type(s). An emitted 0x63 descriptor is an exception: it implies 4 data TLB entries for 1GB pages and 32 data TLB entries for 2MB or 4MB pages. For the TLB descriptors parsing code, the entry count for 1GB pages is encoded at the intel_tlb_table[] mapping, but the 2MB/4MB entry count is totally ignored. Update leaf 0x2's parsing logic 0x2 to account for 32 data TLB entries for 2MB/4MB pages implied by the 0x63 descriptor. Fixes: e0ba94f14f74 ("x86/tlb_info: get last level TLB entry number of CPU") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: stable@kernel.org Cc: "H. Peter Anvin" Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250304085152.51092-4-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 50 +++++++++++++++++++++++++------------ 1 file changed, 34 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 2a3716afee63..134368a3f4b1 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -635,26 +635,37 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 +#define TLB_INST_4K 0x01 +#define TLB_INST_4M 0x02 +#define TLB_INST_2M_4M 0x03 -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 +#define TLB_INST_ALL 0x05 +#define TLB_INST_1G 0x06 -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 +#define TLB_DATA_4K 0x11 +#define TLB_DATA_4M 0x12 +#define TLB_DATA_2M_4M 0x13 +#define TLB_DATA_4K_4M 0x14 -#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G 0x16 +#define TLB_DATA_1G_2M_4M 0x17 -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 +#define TLB_DATA0_4K 0x21 +#define TLB_DATA0_4M 0x22 +#define TLB_DATA0_2M_4M 0x23 -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 +#define STLB_4K 0x41 +#define STLB_4K_2M 0x42 + +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of + * entries for their respective TLB types. The 0x63 descriptor is an + * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries + * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for + * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the + * intel_tlb_table[] mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 static const struct _tlb_table intel_tlb_table[] = { { 0x01, TLB_INST_4K, 32, " TLB_INST 4 KByte pages, 4-way set associative" }, @@ -676,7 +687,8 @@ static const struct _tlb_table intel_tlb_table[] = { { 0x5c, TLB_DATA_4K_4M, 128, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x5d, TLB_DATA_4K_4M, 256, " TLB_DATA 4 KByte and 4 MByte pages" }, { 0x61, TLB_INST_4K, 48, " TLB_INST 4 KByte pages, full associative" }, - { 0x63, TLB_DATA_1G, 4, " TLB_DATA 1 GByte pages, 4-way set associative" }, + { 0x63, TLB_DATA_1G_2M_4M, 4, " TLB_DATA 1 GByte pages, 4-way set associative" + " (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here)" }, { 0x6b, TLB_DATA_4K, 256, " TLB_DATA 4 KByte pages, 8-way associative" }, { 0x6c, TLB_DATA_2M_4M, 128, " TLB_DATA 2 MByte or 4 MByte pages, 8-way associative" }, { 0x6d, TLB_DATA_1G, 16, " TLB_DATA 1 GByte pages, fully associative" }, @@ -776,6 +788,12 @@ static void intel_tlb_lookup(const unsigned char desc) if (tlb_lld_4m[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_4m[ENTRIES] = intel_tlb_table[k].entries; break; + case TLB_DATA_1G_2M_4M: + if (tlb_lld_2m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_2m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + if (tlb_lld_4m[ENTRIES] < TLB_0x63_2M_4M_ENTRIES) + tlb_lld_4m[ENTRIES] = TLB_0x63_2M_4M_ENTRIES; + fallthrough; case TLB_DATA_1G: if (tlb_lld_1g[ENTRIES] < intel_tlb_table[k].entries) tlb_lld_1g[ENTRIES] = intel_tlb_table[k].entries; From 0d3e0dfd68fb9e6b0ec865be9f3377cc3ff55733 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 5 Mar 2025 07:00:05 +0200 Subject: [PATCH 6/7] x86/sgx: Fix size overflows in sgx_encl_create() The total size calculated for EPC can overflow u64 given the added up page for SECS. Further, the total size calculated for shmem can overflow even when the EPC size stays within limits of u64, given that it adds the extra space for 128 byte PCMD structures (one for each page). Address this by pre-evaluating the micro-architectural requirement of SGX: the address space size must be power of two. This is eventually checked up by ECREATE but the pre-check has the additional benefit of making sure that there is some space for additional data. Fixes: 888d24911787 ("x86/sgx: Add SGX_IOC_ENCLAVE_CREATE") Reported-by: Dan Carpenter Signed-off-by: Jarkko Sakkinen Signed-off-by: Ingo Molnar Acked-by: Dave Hansen Cc: Peter Zijlstra Cc: "H. Peter Anvin" Link: https://lore.kernel.org/r/20250305050006.43896-1-jarkko@kernel.org Closes: https://lore.kernel.org/linux-sgx/c87e01a0-e7dd-4749-a348-0980d3444f04@stanley.mountain/ --- arch/x86/kernel/cpu/sgx/ioctl.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kernel/cpu/sgx/ioctl.c b/arch/x86/kernel/cpu/sgx/ioctl.c index b65ab214bdf5..776a20172867 100644 --- a/arch/x86/kernel/cpu/sgx/ioctl.c +++ b/arch/x86/kernel/cpu/sgx/ioctl.c @@ -64,6 +64,13 @@ static int sgx_encl_create(struct sgx_encl *encl, struct sgx_secs *secs) struct file *backing; long ret; + /* + * ECREATE would detect this too, but checking here also ensures + * that the 'encl_size' calculations below can never overflow. + */ + if (!is_power_of_2(secs->size)) + return -EINVAL; + va_page = sgx_encl_grow(encl, true); if (IS_ERR(va_page)) return PTR_ERR(va_page); From c00b413a96261faef4ce22329153c6abd4acef25 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 6 Mar 2025 16:59:16 +0100 Subject: [PATCH 7/7] x86/boot: Sanitize boot params before parsing command line The 5-level paging code parses the command line to look for the 'no5lvl' string, and does so very early, before sanitize_boot_params() has been called and has been given the opportunity to wipe bogus data from the fields in boot_params that are not covered by struct setup_header, and are therefore supposed to be initialized to zero by the bootloader. This triggers an early boot crash when using syslinux-efi to boot a recent kernel built with CONFIG_X86_5LEVEL=y and CONFIG_EFI_STUB=n, as the 0xff padding that now fills the unused PE/COFF header is copied into boot_params by the bootloader, and interpreted as the top half of the command line pointer. Fix this by sanitizing the boot_params before use. Note that there is no harm in calling this more than once; subsequent invocations are able to spot that the boot_params have already been cleaned up. Signed-off-by: Ard Biesheuvel Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Linus Torvalds Cc: # v6.1+ Link: https://lore.kernel.org/r/20250306155915.342465-2-ardb+git@google.com Closes: https://lore.kernel.org/all/202503041549.35913.ulrich.gemkow@ikr.uni-stuttgart.de --- arch/x86/boot/compressed/pgtable_64.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index c882e1f67af0..d8c5de40669d 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include "misc.h" #include +#include #include #include #include "pgtable.h" @@ -107,6 +108,7 @@ asmlinkage void configure_5level_paging(struct boot_params *bp, void *pgtable) bool l5_required = false; /* Initialize boot_params. Required for cmdline_find_option_bool(). */ + sanitize_boot_params(bp); boot_params_ptr = bp; /*