From 116edfe173d0c59ec2aa87fb91f2f31d477b61b3 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:22 +0100 Subject: [PATCH 001/101] tools/x86/kcpuid: Fix error handling Error handling in kcpuid is unreliable. On malloc() failures, the code prints an error then just goes on. The error messages are also printed to standard output instead of standard error. Use err() and errx() from to direct all error messages to standard error and automatically exit the program. Use err() to include the errno information, and errx() otherwise. Use warnx() for warnings. While at it, alphabetically reorder the header includes. [ mingo: Fix capitalization in the help text while at it. ] Fixes: c6b2f240bf8d ("tools/x86: Add a kcpuid tool to show raw CPU features") Reported-by: Remington Brasga Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-2-darwi@linutronix.de Closes: https://lkml.kernel.org/r/20240926223557.2048-1-rbrasga@uci.edu --- tools/arch/x86/kcpuid/kcpuid.c | 47 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1b25c0a95d3f..40a9e59c2fd5 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,11 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE -#include +#include +#include #include +#include #include #include -#include #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) @@ -145,14 +146,14 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); if (!func->leafs) - perror("malloc func leaf"); + err(EXIT_FAILURE, NULL); func->nr = 1; } else { s = func->nr; func->leafs = realloc(func->leafs, (s + 1) * sizeof(*leaf)); if (!func->leafs) - perror("realloc f->leafs"); + err(EXIT_FAILURE, NULL); func->nr++; } @@ -211,7 +212,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range = malloc(sizeof(struct cpuid_range)); if (!range) - perror("malloc range"); + err(EXIT_FAILURE, NULL); if (input_eax & 0x80000000) range->is_ext = true; @@ -220,7 +221,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) - perror("malloc range->funcs"); + err(EXIT_FAILURE, NULL); range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); @@ -395,8 +396,8 @@ static int parse_line(char *line) return 0; err_exit: - printf("Warning: wrong line format:\n"); - printf("\tline[%d]: %s\n", flines, line); + warnx("Wrong line format:\n" + "\tline[%d]: %s", flines, line); return -1; } @@ -418,10 +419,8 @@ static void parse_text(void) file = fopen("./cpuid.csv", "r"); } - if (!file) { - printf("Fail to open '%s'\n", filename); - return; - } + if (!file) + err(EXIT_FAILURE, "%s", filename); while (1) { ret = getline(&line, &len, file); @@ -530,7 +529,7 @@ static inline struct cpuid_func *index_to_func(u32 index) func_idx = index & 0xffff; if ((func_idx + 1) > (u32)range->nr) { - printf("ERR: invalid input index (0x%x)\n", index); + warnx("Invalid input index (0x%x)", index); return NULL; } return &range->funcs[func_idx]; @@ -562,7 +561,7 @@ static void show_info(void) return; } - printf("ERR: invalid input subleaf (0x%x)\n", user_sub); + warnx("Invalid input subleaf (0x%x)", user_sub); } show_func(func); @@ -593,15 +592,15 @@ static void setup_platform_cpuid(void) static void usage(void) { - printf("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the cpuid csv file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw cpuid data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check\n" + warnx("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -652,7 +651,7 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - printf("%s: Invalid option '%c'\n", argv[0], optopt); + warnx("Invalid option '%c'", optopt); return -1; } From a866a6775793a2554585073704b83c4691d80740 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:23 +0100 Subject: [PATCH 002/101] tools/x86/kcpuid: Exit the program on invalid parameters If the user passed an invalid CPUID index value through --leaf=index, kcpuid prints a warning, does nothing, then exits successfully. Transform the warning to an error, and exit the program with a proper error code. Similarly, if the user passed an invalid subleaf, kcpuid prints a warning, dumps the whole leaf, then exits successfully. Print a clear error message regarding the invalid subleaf and exit the program with the proper error code. Note, moving the "Invalid input index" message from index_to_func() to show_info() localizes error message handling to the latter, where it should be. It also allows index_to_func() to be refactored at further commits. Note, since after this commit and its parent kcpuid does not just "move on" on failures, remove the NULL parameter check plus silent exit at show_func() and show_leaf(). Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-3-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 22 ++++++++-------------- 1 file changed, 8 insertions(+), 14 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 40a9e59c2fd5..25b10feeb720 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -481,9 +481,6 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) static void show_leaf(struct subleaf *leaf) { - if (!leaf) - return; - if (show_raw) { leaf_print_raw(leaf); } else { @@ -505,9 +502,6 @@ static void show_func(struct cpuid_func *func) { int i; - if (!func) - return; - for (i = 0; i < func->nr; i++) show_leaf(&func->leafs[i]); } @@ -528,10 +522,9 @@ static inline struct cpuid_func *index_to_func(u32 index) range = (index & 0x80000000) ? leafs_ext : leafs_basic; func_idx = index & 0xffff; - if ((func_idx + 1) > (u32)range->nr) { - warnx("Invalid input index (0x%x)", index); + if ((func_idx + 1) > (u32)range->nr) return NULL; - } + return &range->funcs[func_idx]; } @@ -550,18 +543,19 @@ static void show_info(void) /* Only show specific leaf/subleaf info */ func = index_to_func(user_index); if (!func) - return; + errx(EXIT_FAILURE, "Invalid input leaf (0x%x)", user_index); /* Dump the raw data also */ show_raw = true; if (user_sub != 0xFFFFFFFF) { - if (user_sub + 1 <= (u32)func->nr) { - show_leaf(&func->leafs[user_sub]); - return; + if (user_sub + 1 > (u32)func->nr) { + errx(EXIT_FAILURE, "Leaf 0x%x has no valid subleaf = 0x%x", + user_index, user_sub); } - warnx("Invalid input subleaf (0x%x)", user_sub); + show_leaf(&func->leafs[user_sub]); + return; } show_func(func); From 660c29fe53deeb3b3aef1d666ed3bde7608380bd Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:24 +0100 Subject: [PATCH 003/101] tools/x86/kcpuid: Simplify usage() handling Refactor usage() to accept an exit code parameter and exit the program after usage output. This streamlines its callers' code paths. Remove the "Invalid option" error message since getopt_long(3) already emits a similar message by default. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-4-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 37 +++++++++++++++------------------- 1 file changed, 16 insertions(+), 21 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 25b10feeb720..a90ac0b898a1 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -10,6 +10,7 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) #define min(a, b) (((a) < (b)) ? (a) : (b)) +#define __noreturn __attribute__((__noreturn__)) typedef unsigned int u32; typedef unsigned long long u64; @@ -584,17 +585,17 @@ static void setup_platform_cpuid(void) leafs_ext = setup_cpuid_range(0x80000000); } -static void usage(void) +static void __noreturn usage(int exit_code) { - warnx("kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" - "\t-a|--all Show both bit flags and complex bit fields info\n" - "\t-b|--bitflags Show boolean flags only\n" - "\t-d|--detail Show details of the flag/fields (default)\n" - "\t-f|--flags Specify the CPUID CSV file\n" - "\t-h|--help Show usage info\n" - "\t-l|--leaf=index Specify the leaf you want to check\n" - "\t-r|--raw Show raw CPUID data\n" - "\t-s|--subleaf=sub Specify the subleaf you want to check" + errx(exit_code, "kcpuid [-abdfhr] [-l leaf] [-s subleaf]\n" + "\t-a|--all Show both bit flags and complex bit fields info\n" + "\t-b|--bitflags Show boolean flags only\n" + "\t-d|--detail Show details of the flag/fields (default)\n" + "\t-f|--flags Specify the CPUID CSV file\n" + "\t-h|--help Show usage info\n" + "\t-l|--leaf=index Specify the leaf you want to check\n" + "\t-r|--raw Show raw CPUID data\n" + "\t-s|--subleaf=sub Specify the subleaf you want to check" ); } @@ -610,7 +611,7 @@ static struct option opts[] = { { NULL, 0, NULL, 0 } }; -static int parse_options(int argc, char *argv[]) +static void parse_options(int argc, char *argv[]) { int c; @@ -630,9 +631,7 @@ static int parse_options(int argc, char *argv[]) user_csv = optarg; break; case 'h': - usage(); - exit(1); - break; + usage(EXIT_SUCCESS); case 'l': /* main leaf */ user_index = strtoul(optarg, NULL, 0); @@ -645,11 +644,8 @@ static int parse_options(int argc, char *argv[]) user_sub = strtoul(optarg, NULL, 0); break; default: - warnx("Invalid option '%c'", optopt); - return -1; - } - - return 0; + usage(EXIT_FAILURE); + } } /* @@ -662,8 +658,7 @@ static int parse_options(int argc, char *argv[]) */ int main(int argc, char *argv[]) { - if (parse_options(argc, argv)) - return -1; + parse_options(argc, argv); /* Setup the cpuid leafs of current platform */ setup_platform_cpuid(); From 6bef74cab03ada39f16f1fb86b732f7c71a9f07a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:25 +0100 Subject: [PATCH 004/101] tools/x86/kcpuid: Save CPUID output in an array For each CPUID leaf/subleaf query, save the output in an output[] array instead of spelling it out using EAX to EDX variables. This allows the CPUID output to be accessed programmatically instead of calling decode_bits() four times. Loop-based access also allows "kcpuid --detail" to print the correct output register names in next commit. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-5-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index a90ac0b898a1..8da0e072055b 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -51,7 +51,7 @@ static const char * const reg_names[] = { struct subleaf { u32 index; u32 sub; - u32 eax, ebx, ecx, edx; + u32 output[NR_REGS]; struct reg_desc info[NR_REGS]; }; @@ -119,11 +119,11 @@ static void leaf_print_raw(struct subleaf *leaf) if (leaf->sub == 0) printf("0x%08x: subleafs:\n", leaf->index); - printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->sub, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf(" %2d: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->sub, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } else { - printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", - leaf->index, leaf->eax, leaf->ebx, leaf->ecx, leaf->edx); + printf("0x%08x: EAX=0x%08x, EBX=0x%08x, ECX=0x%08x, EDX=0x%08x\n", leaf->index, + leaf->output[0], leaf->output[1], leaf->output[2], leaf->output[3]); } } @@ -163,10 +163,10 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, leaf->index = f; leaf->sub = subleaf; - leaf->eax = a; - leaf->ebx = b; - leaf->ecx = c; - leaf->edx = d; + leaf->output[R_EAX] = a; + leaf->output[R_EBX] = b; + leaf->output[R_ECX] = c; + leaf->output[R_EDX] = d; return false; } @@ -490,10 +490,8 @@ static void show_leaf(struct subleaf *leaf) leaf->index, leaf->sub); } - decode_bits(leaf->eax, &leaf->info[R_EAX], R_EAX); - decode_bits(leaf->ebx, &leaf->info[R_EBX], R_EBX); - decode_bits(leaf->ecx, &leaf->info[R_ECX], R_ECX); - decode_bits(leaf->edx, &leaf->info[R_EDX], R_EDX); + for (int i = R_EAX; i < NR_REGS; i++) + decode_bits(leaf->output[i], &leaf->info[i], i); if (!show_raw && show_details) printf("\n"); From ce61b6067d8cbfcd3607d942913b21c7ce2b384b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:26 +0100 Subject: [PATCH 005/101] tools/x86/kcpuid: Print correct CPUID output register names kcpuid --all --detail claims that all bits belong to ECX, in the form of the header CPUID_${leaf}_ECX[${subleaf}]. Print the correct register name for all CPUID output. kcpuid --detail also dumps the raw register value if a leaf/subleaf is covered in the CSV file, but a certain output register within it is not covered by any CSV entry. Since register names are now properly printed, and since the CSV file has become exhaustive using x86-cpuid-db, remove that value dump as it pollutes the output. While at it, rename decode_bits() to show_reg(). This makes it match its show_range(), show_leaf() and show_reg_header() counterparts. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-6-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 8da0e072055b..aa0e03798326 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -436,20 +436,12 @@ static void parse_text(void) fclose(file); } - -/* Decode every eax/ebx/ecx/edx */ -static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) +static void show_reg(const struct reg_desc *rdesc, u32 value) { - struct bits_desc *bdesc; + const struct bits_desc *bdesc; int start, end, i; u32 mask; - if (!rdesc->nr) { - if (show_details) - printf("\t %s: 0x%08x\n", reg_names[reg], value); - return; - } - for (i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; @@ -480,18 +472,21 @@ static void decode_bits(u32 value, struct reg_desc *rdesc, enum cpuid_reg reg) } } +static void show_reg_header(bool has_entries, u32 leaf, u32 subleaf, const char *reg_name) +{ + if (show_details && has_entries) + printf("CPUID_0x%x_%s[0x%x]:\n", leaf, reg_name, subleaf); +} + static void show_leaf(struct subleaf *leaf) { - if (show_raw) { + if (show_raw) leaf_print_raw(leaf); - } else { - if (show_details) - printf("CPUID_0x%x_ECX[0x%x]:\n", - leaf->index, leaf->sub); - } - for (int i = R_EAX; i < NR_REGS; i++) - decode_bits(leaf->output[i], &leaf->info[i], i); + for (int i = R_EAX; i < NR_REGS; i++) { + show_reg_header((leaf->info[i].nr > 0), leaf->index, leaf->sub, reg_names[i]); + show_reg(&leaf->info[i], leaf->output[i]); + } if (!show_raw && show_details) printf("\n"); From c061ded035b5bdeb21adc1046c0bfbba4bcee2d2 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:27 +0100 Subject: [PATCH 006/101] tools/x86/kcpuid: Remove unused local variable The local variable "index" is written to, but is not read from anywhere. Remove it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-7-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index aa0e03798326..f1dac5bee515 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -181,10 +181,6 @@ static void raw_dump_range(struct cpuid_range *range) for (f = 0; (int)f < range->nr; f++) { struct cpuid_func *func = &range->funcs[f]; - u32 index = f; - - if (range->is_ext) - index += 0x80000000; /* Skip leaf without valid items */ if (!func->nr) From 2b383ca0896f0ffc24fd4d93440320d2dd5daad1 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:28 +0100 Subject: [PATCH 007/101] tools/x86/kcpuid: Remove unused global variable The global variable "is_amd" is written to, but is not read from anywhere. Remove it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-8-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index f1dac5bee515..f268c768d023 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -79,7 +79,6 @@ struct cpuid_range { */ struct cpuid_range *leafs_basic, *leafs_ext; -static bool is_amd; static bool show_details; static bool show_raw; static bool show_flags_only = true; @@ -559,16 +558,6 @@ static void show_info(void) static void setup_platform_cpuid(void) { - u32 eax, ebx, ecx, edx; - - /* Check vendor */ - eax = ebx = ecx = edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - - /* "htuA" */ - if (ebx == 0x68747541) - is_amd = true; - /* Setup leafs for the basic and extended range */ leafs_basic = setup_cpuid_range(0x0); leafs_ext = setup_cpuid_range(0x80000000); From 8984cea5c4743d7fabf5de9da142cfc5dde1a144 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:29 +0100 Subject: [PATCH 008/101] tools/x86/kcpuid: Set parse_line() return type to void parse_line() returns an integer but its caller ignored it. Change the function signature to return void. While at it, adjust some of the "Skip line" comments for readability. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-9-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index f268c768d023..4fa768bb49e1 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -277,7 +277,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) * 0, 0, EAX, 31:0, max_basic_leafs, Max input value for supported subleafs * 1, 0, ECX, 0, sse3, Streaming SIMD Extensions 3(SSE3) */ -static int parse_line(char *line) +static void parse_line(char *line) { char *str; int i; @@ -307,7 +307,7 @@ static int parse_line(char *line) /* Skip comments and NULL line */ if (line[0] == '#' || line[0] == '\n') - return 0; + return; strncpy(buffer, line, 511); buffer[511] = 0; @@ -330,16 +330,15 @@ static int parse_line(char *line) else range = leafs_basic; - index &= 0x7FFFFFFF; /* Skip line parsing for non-existing indexes */ + index &= 0x7FFFFFFF; if ((int)index >= range->nr) - return -1; + return; + /* Skip line parsing if the index CPUID output is all zero */ func = &range->funcs[index]; - - /* Return if the index has no valid item on this platform */ if (!func->nr) - return 0; + return; /* subleaf */ buf = tokens[1]; @@ -352,11 +351,11 @@ static int parse_line(char *line) subleaf_start = strtoul(start, NULL, 0); subleaf_end = min(subleaf_end, (u32)(func->nr - 1)); if (subleaf_start > subleaf_end) - return 0; + return; } else { subleaf_start = subleaf_end; if (subleaf_start > (u32)(func->nr - 1)) - return 0; + return; } /* register */ @@ -389,12 +388,11 @@ static int parse_line(char *line) strcpy(bdesc->simp, strtok(tokens[4], " \t")); strcpy(bdesc->detail, tokens[5]); } - return 0; + return; err_exit: warnx("Wrong line format:\n" "\tline[%d]: %s", flines, line); - return -1; } /* Parse csv file, and construct the array of all leafs and subleafs */ From 0a8f12ccd2e6edac89292af63c3a2050b4aac61b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:30 +0100 Subject: [PATCH 009/101] tools/x86/kcpuid: Use C99-style for loops Since commit e8c07082a810 ("Kbuild: move to -std=gnu11") and the kernel allows C99-style variable declarations inside of a for() loop. Adjust the kcpuid code accordingly. Note, this helps readability as some of the kcpuid functions have a huge list of variable declarations on top. Note, remove the empty lines before cpuid() invocations as it is clearer to have their parameter initialization and the actual call in one block. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-10-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 52 ++++++++++++++-------------------- 1 file changed, 21 insertions(+), 31 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 4fa768bb49e1..413e5dad9e46 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -172,13 +172,10 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, static void raw_dump_range(struct cpuid_range *range) { - u32 f; - int i; - printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic"); printf("================\n"); - for (f = 0; (int)f < range->nr; f++) { + for (u32 f = 0; (int)f < range->nr; f++) { struct cpuid_func *func = &range->funcs[f]; /* Skip leaf without valid items */ @@ -186,7 +183,7 @@ static void raw_dump_range(struct cpuid_range *range) continue; /* First item is the main leaf, followed by all subleafs */ - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) leaf_print_raw(&func->leafs[i]); } } @@ -194,15 +191,14 @@ static void raw_dump_range(struct cpuid_range *range) #define MAX_SUBLEAF_NUM 64 struct cpuid_range *setup_cpuid_range(u32 input_eax) { - u32 max_func, idx_func, subleaf, max_subleaf; - u32 eax, ebx, ecx, edx, f = input_eax; struct cpuid_range *range; - bool allzero; + u32 max_func, idx_func; + u32 eax, ebx, ecx, edx; eax = input_eax; ebx = ecx = edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); + max_func = eax; idx_func = (max_func & 0xffff) + 1; @@ -222,20 +218,21 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); - for (; f <= max_func; f++) { - eax = f; - subleaf = ecx = 0; + for (u32 f = input_eax; f <= max_func; f++) { + u32 max_subleaf = MAX_SUBLEAF_NUM; + bool allzero; + eax = f; + ecx = 0; cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); + + allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx); if (allzero) continue; if (!has_subleafs(f)) continue; - max_subleaf = MAX_SUBLEAF_NUM; - /* * Some can provide the exact number of subleafs, * others have to be tried (0xf) @@ -253,13 +250,12 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) if (f == 0x80000026) max_subleaf = 5; - for (subleaf = 1; subleaf < max_subleaf; subleaf++) { + for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) { eax = f; ecx = subleaf; - cpuid(&eax, &ebx, &ecx, &edx); - allzero = cpuid_store(range, f, subleaf, - eax, ebx, ecx, edx); + + allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); if (allzero) continue; } @@ -280,12 +276,10 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) static void parse_line(char *line) { char *str; - int i; struct cpuid_range *range; struct cpuid_func *func; struct subleaf *leaf; u32 index; - u32 sub; char buffer[512]; char *buf; /* @@ -312,7 +306,7 @@ static void parse_line(char *line) strncpy(buffer, line, 511); buffer[511] = 0; str = buffer; - for (i = 0; i < 5; i++) { + for (int i = 0; i < 5; i++) { tokens[i] = strtok(str, ","); if (!tokens[i]) goto err_exit; @@ -378,7 +372,7 @@ static void parse_line(char *line) bit_end = strtoul(end, NULL, 0); bit_start = (start) ? strtoul(start, NULL, 0) : bit_end; - for (sub = subleaf_start; sub <= subleaf_end; sub++) { + for (u32 sub = subleaf_start; sub <= subleaf_end; sub++) { leaf = &func->leafs[sub]; reg = &leaf->info[reg_index]; bdesc = ®->descs[reg->nr++]; @@ -432,10 +426,10 @@ static void parse_text(void) static void show_reg(const struct reg_desc *rdesc, u32 value) { const struct bits_desc *bdesc; - int start, end, i; + int start, end; u32 mask; - for (i = 0; i < rdesc->nr; i++) { + for (int i = 0; i < rdesc->nr; i++) { bdesc = &rdesc->descs[i]; start = bdesc->start; @@ -487,17 +481,13 @@ static void show_leaf(struct subleaf *leaf) static void show_func(struct cpuid_func *func) { - int i; - - for (i = 0; i < func->nr; i++) + for (int i = 0; i < func->nr; i++) show_leaf(&func->leafs[i]); } static void show_range(struct cpuid_range *range) { - int i; - - for (i = 0; i < range->nr; i++) + for (int i = 0; i < range->nr; i++) show_func(&range->funcs[i]); } From c479a84488d10b3d4259186d80839f99e26b1706 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:31 +0100 Subject: [PATCH 010/101] tools/x86/kcpuid: Use intrinsics Use the __cpuid_count() intrinsic, provided by GCC and LLVM, instead of rolling a manual version. Both of the kernel's minimum required GCC version (5.1) and LLVM version (13.0.1) supports it, and it is heavily used across standard Linux user-space tooling. This also makes the CPUID call sites more readable. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-11-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 37 ++++++++++++++-------------------- 1 file changed, 15 insertions(+), 22 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 413e5dad9e46..25388a5d40ac 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #define _GNU_SOURCE +#include #include #include #include @@ -86,16 +87,16 @@ static u32 user_index = 0xFFFFFFFF; static u32 user_sub = 0xFFFFFFFF; static int flines; -static inline void cpuid(u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) -{ - /* ecx is often an input as well as an output. */ - asm volatile("cpuid" - : "=a" (*eax), - "=b" (*ebx), - "=c" (*ecx), - "=d" (*edx) - : "0" (*eax), "2" (*ecx)); -} +/* + * Force using __cpuid_count() instead of __cpuid(). The + * latter leaves ECX uninitialized, which can break CPUID queries. + */ + +#define cpuid(leaf, a, b, c, d) \ + __cpuid_count(leaf, 0, a, b, c, d) + +#define cpuid_count(leaf, subleaf, a, b, c, d) \ + __cpuid_count(leaf, subleaf, a, b, c, d) static inline bool has_subleafs(u32 f) { @@ -195,12 +196,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) u32 max_func, idx_func; u32 eax, ebx, ecx, edx; - eax = input_eax; - ebx = ecx = edx = 0; - cpuid(&eax, &ebx, &ecx, &edx); - - max_func = eax; - idx_func = (max_func & 0xffff) + 1; + cpuid(input_eax, max_func, ebx, ecx, edx); range = malloc(sizeof(struct cpuid_range)); if (!range) @@ -211,6 +207,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) else range->is_ext = false; + idx_func = (max_func & 0xffff) + 1; range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) err(EXIT_FAILURE, NULL); @@ -222,9 +219,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) u32 max_subleaf = MAX_SUBLEAF_NUM; bool allzero; - eax = f; - ecx = 0; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid(f, eax, ebx, ecx, edx); allzero = cpuid_store(range, f, 0, eax, ebx, ecx, edx); if (allzero) @@ -251,9 +246,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) max_subleaf = 5; for (u32 subleaf = 1; subleaf < max_subleaf; subleaf++) { - eax = f; - ecx = subleaf; - cpuid(&eax, &ebx, &ecx, &edx); + cpuid_count(f, subleaf, eax, ebx, ecx, edx); allzero = cpuid_store(range, f, subleaf, eax, ebx, ecx, edx); if (allzero) From 3151ec059dd1e71761f3beccc1e5f5c18fac4afa Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:32 +0100 Subject: [PATCH 011/101] tools/x86/kcpuid: Refactor CPUID range handling for future expansion The kcpuid code assumes only two CPUID index ranges, standard (0x0...) and extended (0x80000000...). Since additional CPUID index ranges will be added in further commits, replace the "is_ext" boolean with enumeration-based range classification. Collect all CPUID ranges in a structured array and introduce helper macros to iterate over it. Use such helpers throughout the code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-12-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 100 +++++++++++++++++++-------------- 1 file changed, 59 insertions(+), 41 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 25388a5d40ac..7790136f04dc 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -66,19 +66,50 @@ struct cpuid_func { int nr; }; +enum range_index { + RANGE_STD = 0, /* Standard */ + RANGE_EXT = 0x80000000, /* Extended */ +}; + +#define CPUID_INDEX_MASK 0x80000000 +#define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK) + struct cpuid_range { /* array of main leafs */ struct cpuid_func *funcs; /* number of valid leafs */ int nr; - bool is_ext; + enum range_index index; }; -/* - * basic: basic functions range: [0... ] - * ext: extended functions range: [0x80000000... ] - */ -struct cpuid_range *leafs_basic, *leafs_ext; +static struct cpuid_range ranges[] = { + { .index = RANGE_STD, }, + { .index = RANGE_EXT, }, +}; + +static char *range_to_str(struct cpuid_range *range) +{ + switch (range->index) { + case RANGE_STD: return "Standard"; + case RANGE_EXT: return "Extended"; + default: return NULL; + } +} + +#define for_each_cpuid_range(range) \ + for (unsigned int i = 0; i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]); i++) + +struct cpuid_range *index_to_cpuid_range(u32 index) +{ + struct cpuid_range *range; + + for_each_cpuid_range(range) { + if (range->index == (index & CPUID_INDEX_MASK)) + return range; + } + + return NULL; +} static bool show_details; static bool show_raw; @@ -173,7 +204,7 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, static void raw_dump_range(struct cpuid_range *range) { - printf("%s Leafs :\n", range->is_ext ? "Extended" : "Basic"); + printf("%s Leafs :\n", range_to_str(range)); printf("================\n"); for (u32 f = 0; (int)f < range->nr; f++) { @@ -190,22 +221,12 @@ static void raw_dump_range(struct cpuid_range *range) } #define MAX_SUBLEAF_NUM 64 -struct cpuid_range *setup_cpuid_range(u32 input_eax) +void setup_cpuid_range(struct cpuid_range *range) { - struct cpuid_range *range; u32 max_func, idx_func; u32 eax, ebx, ecx, edx; - cpuid(input_eax, max_func, ebx, ecx, edx); - - range = malloc(sizeof(struct cpuid_range)); - if (!range) - err(EXIT_FAILURE, NULL); - - if (input_eax & 0x80000000) - range->is_ext = true; - else - range->is_ext = false; + cpuid(range->index, max_func, ebx, ecx, edx); idx_func = (max_func & 0xffff) + 1; range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); @@ -215,7 +236,7 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) range->nr = idx_func; memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); - for (u32 f = input_eax; f <= max_func; f++) { + for (u32 f = range->index; f <= max_func; f++) { u32 max_subleaf = MAX_SUBLEAF_NUM; bool allzero; @@ -254,8 +275,6 @@ struct cpuid_range *setup_cpuid_range(u32 input_eax) } } - - return range; } /* @@ -312,13 +331,13 @@ static void parse_line(char *line) /* index/main-leaf */ index = strtoull(tokens[0], NULL, 0); - if (index & 0x80000000) - range = leafs_ext; - else - range = leafs_basic; + /* Skip line parsing if it's not covered by known ranges */ + range = index_to_cpuid_range(index); + if (!range) + return; /* Skip line parsing for non-existing indexes */ - index &= 0x7FFFFFFF; + index &= CPUID_FUNCTION_MASK; if ((int)index >= range->nr) return; @@ -489,9 +508,11 @@ static inline struct cpuid_func *index_to_func(u32 index) struct cpuid_range *range; u32 func_idx; - range = (index & 0x80000000) ? leafs_ext : leafs_basic; - func_idx = index & 0xffff; + range = index_to_cpuid_range(index); + if (!range) + return NULL; + func_idx = index & 0xffff; if ((func_idx + 1) > (u32)range->nr) return NULL; @@ -500,12 +521,13 @@ static inline struct cpuid_func *index_to_func(u32 index) static void show_info(void) { + struct cpuid_range *range; struct cpuid_func *func; if (show_raw) { /* Show all of the raw output of 'cpuid' instr */ - raw_dump_range(leafs_basic); - raw_dump_range(leafs_ext); + for_each_cpuid_range(range) + raw_dump_range(range); return; } @@ -533,15 +555,8 @@ static void show_info(void) } printf("CPU features:\n=============\n\n"); - show_range(leafs_basic); - show_range(leafs_ext); -} - -static void setup_platform_cpuid(void) -{ - /* Setup leafs for the basic and extended range */ - leafs_basic = setup_cpuid_range(0x0); - leafs_ext = setup_cpuid_range(0x80000000); + for_each_cpuid_range(range) + show_range(range); } static void __noreturn usage(int exit_code) @@ -617,10 +632,13 @@ static void parse_options(int argc, char *argv[]) */ int main(int argc, char *argv[]) { + struct cpuid_range *range; + parse_options(argc, argv); /* Setup the cpuid leafs of current platform */ - setup_platform_cpuid(); + for_each_cpuid_range(range) + setup_cpuid_range(range); /* Read and parse the 'cpuid.csv' */ parse_text(); From f2e2efe9489d883fdaac8b7b46bd669b6214b1cb Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:33 +0100 Subject: [PATCH 012/101] tools/x86/kcpuid: Extend CPUID index mask macro Extend the CPUID index mask macro from 0x80000000 to 0xffff0000. This accommodates the Transmeta (0x80860000) and Centaur (0xc0000000) index ranges which will be later added. This also automatically sets CPUID_FUNCTION_MASK to 0x0000ffff, which is the actual correct value. Use that macro, instead of the 0xffff literal where appropriate. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-13-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 7790136f04dc..1f48de343d4d 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -71,7 +71,7 @@ enum range_index { RANGE_EXT = 0x80000000, /* Extended */ }; -#define CPUID_INDEX_MASK 0x80000000 +#define CPUID_INDEX_MASK 0xffff0000 #define CPUID_FUNCTION_MASK (~CPUID_INDEX_MASK) struct cpuid_range { @@ -173,7 +173,7 @@ static bool cpuid_store(struct cpuid_range *range, u32 f, int subleaf, * Cut off vendor-prefix from CPUID function as we're using it as an * index into ->funcs. */ - func = &range->funcs[f & 0xffff]; + func = &range->funcs[f & CPUID_FUNCTION_MASK]; if (!func->leafs) { func->leafs = malloc(sizeof(struct subleaf)); @@ -228,7 +228,7 @@ void setup_cpuid_range(struct cpuid_range *range) cpuid(range->index, max_func, ebx, ecx, edx); - idx_func = (max_func & 0xffff) + 1; + idx_func = (max_func & CPUID_FUNCTION_MASK) + 1; range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); if (!range->funcs) err(EXIT_FAILURE, NULL); @@ -512,7 +512,7 @@ static inline struct cpuid_func *index_to_func(u32 index) if (!range) return NULL; - func_idx = index & 0xffff; + func_idx = index & CPUID_FUNCTION_MASK; if ((func_idx + 1) > (u32)range->nr) return NULL; From 74d29127f83042500c20c903dd67151dbdd86ec8 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:34 +0100 Subject: [PATCH 013/101] tools/x86/kcpuid: Consolidate index validity checks Let index_to_cpuid_range() return a CPUID range only if the passed index is within a CPUID range's maximum supported function on the CPU. Returning a CPUID range that is invalid on the CPU for the passed index does not make sense. This also avoids repeating the "function index is within CPUID range" checks, both at setup_cpuid_range() and index_to_func(). Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-14-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index 1f48de343d4d..ac3d36bef85f 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -101,10 +101,12 @@ static char *range_to_str(struct cpuid_range *range) struct cpuid_range *index_to_cpuid_range(u32 index) { + u32 func_idx = index & CPUID_FUNCTION_MASK; + u32 range_idx = index & CPUID_INDEX_MASK; struct cpuid_range *range; for_each_cpuid_range(range) { - if (range->index == (index & CPUID_INDEX_MASK)) + if (range->index == range_idx && (u32)range->nr > func_idx) return range; } @@ -331,17 +333,16 @@ static void parse_line(char *line) /* index/main-leaf */ index = strtoull(tokens[0], NULL, 0); - /* Skip line parsing if it's not covered by known ranges */ + /* + * Skip line parsing if the index is not covered by known-valid + * CPUID ranges on this CPU. + */ range = index_to_cpuid_range(index); if (!range) return; - /* Skip line parsing for non-existing indexes */ - index &= CPUID_FUNCTION_MASK; - if ((int)index >= range->nr) - return; - /* Skip line parsing if the index CPUID output is all zero */ + index &= CPUID_FUNCTION_MASK; func = &range->funcs[index]; if (!func->nr) return; @@ -505,17 +506,13 @@ static void show_range(struct cpuid_range *range) static inline struct cpuid_func *index_to_func(u32 index) { + u32 func_idx = index & CPUID_FUNCTION_MASK; struct cpuid_range *range; - u32 func_idx; range = index_to_cpuid_range(index); if (!range) return NULL; - func_idx = index & CPUID_FUNCTION_MASK; - if ((func_idx + 1) > (u32)range->nr) - return NULL; - return &range->funcs[func_idx]; } From 72383c8274edf0a1736973462603dcfd0a088bb9 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:35 +0100 Subject: [PATCH 014/101] tools/x86/kcpuid: Filter valid CPUID ranges Next commits will introduce vendor-specific CPUID ranges like Transmeta's 0x8086000 range and Centaur's 0xc0000000. Initially explicit vendor detection was implemented, but it turned out to be not strictly necessary. As Dave Hansen noted, even established tools like cpuid(1) just tries all ranges indices, and see if the CPU responds back with something sensible. Do something similar at setup_cpuid_range(). Query the range's index, and check the maximum range function value returned. If it's within an expected interval of [range_index, range_index + MAX_RANGE_INDEX_OFFSET], accept the range as valid and further query its leaves. Set MAX_RANGE_INDEX_OFFSET to a heuristic of 0xff. That should be sensible enough since all the ranges covered by x86-cpuid-db XML database are: 0x00000000 0x00000023 0x40000000 0x40000000 0x80000000 0x80000026 0x80860000 0x80860007 0xc0000000 0xc0000001 At setup_cpuid_range(), if the range's returned maximum function was not sane, mark it as invalid by setting its number of leaves, range->nr, to zero. Introduce the for_each_valid_cpuid_range() iterator instead of sprinkling "range->nr != 0" checks throughout the code. Suggested-by: Dave Hansen Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-15-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 37 +++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index ac3d36bef85f..fe3d0581afd1 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -96,8 +96,13 @@ static char *range_to_str(struct cpuid_range *range) } } -#define for_each_cpuid_range(range) \ - for (unsigned int i = 0; i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]); i++) +#define __for_each_cpuid_range(range, __condition) \ + for (unsigned int i = 0; \ + i < ARRAY_SIZE(ranges) && ((range) = &ranges[i]) && (__condition); \ + i++) + +#define for_each_valid_cpuid_range(range) __for_each_cpuid_range(range, (range)->nr != 0) +#define for_each_cpuid_range(range) __for_each_cpuid_range(range, true) struct cpuid_range *index_to_cpuid_range(u32 index) { @@ -105,7 +110,7 @@ struct cpuid_range *index_to_cpuid_range(u32 index) u32 range_idx = index & CPUID_INDEX_MASK; struct cpuid_range *range; - for_each_cpuid_range(range) { + for_each_valid_cpuid_range(range) { if (range->index == range_idx && (u32)range->nr > func_idx) return range; } @@ -223,20 +228,32 @@ static void raw_dump_range(struct cpuid_range *range) } #define MAX_SUBLEAF_NUM 64 +#define MAX_RANGE_INDEX_OFFSET 0xff void setup_cpuid_range(struct cpuid_range *range) { - u32 max_func, idx_func; + u32 max_func, range_funcs_sz; u32 eax, ebx, ecx, edx; cpuid(range->index, max_func, ebx, ecx, edx); - idx_func = (max_func & CPUID_FUNCTION_MASK) + 1; - range->funcs = malloc(sizeof(struct cpuid_func) * idx_func); + /* + * If the CPUID range's maximum function value is garbage, then it + * is not recognized by this CPU. Set the range's number of valid + * leaves to zero so that for_each_valid_cpu_range() can ignore it. + */ + if (max_func < range->index || max_func > (range->index + MAX_RANGE_INDEX_OFFSET)) { + range->nr = 0; + return; + } + + range->nr = (max_func & CPUID_FUNCTION_MASK) + 1; + range_funcs_sz = range->nr * sizeof(struct cpuid_func); + + range->funcs = malloc(range_funcs_sz); if (!range->funcs) err(EXIT_FAILURE, NULL); - range->nr = idx_func; - memset(range->funcs, 0, sizeof(struct cpuid_func) * idx_func); + memset(range->funcs, 0, range_funcs_sz); for (u32 f = range->index; f <= max_func; f++) { u32 max_subleaf = MAX_SUBLEAF_NUM; @@ -523,7 +540,7 @@ static void show_info(void) if (show_raw) { /* Show all of the raw output of 'cpuid' instr */ - for_each_cpuid_range(range) + for_each_valid_cpuid_range(range) raw_dump_range(range); return; } @@ -552,7 +569,7 @@ static void show_info(void) } printf("CPU features:\n=============\n\n"); - for_each_cpuid_range(range) + for_each_valid_cpuid_range(range) show_range(range); } From 87669e74d8481a066eb8d319cef7b2ea93d4e24b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:36 +0100 Subject: [PATCH 015/101] tools/x86/kcpuid: Define Transmeta and Centaur index ranges Explicitly define the CPUID index ranges for Transmeta (0x80860000) and Centaur/Zhaoxin (0xc0000000). Without these explicit definitions, their respective CPUID indices would be skipped during CSV bitfield parsing. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-16-darwi@linutronix.de --- tools/arch/x86/kcpuid/kcpuid.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/arch/x86/kcpuid/kcpuid.c b/tools/arch/x86/kcpuid/kcpuid.c index fe3d0581afd1..7dc6b9235d02 100644 --- a/tools/arch/x86/kcpuid/kcpuid.c +++ b/tools/arch/x86/kcpuid/kcpuid.c @@ -69,6 +69,8 @@ struct cpuid_func { enum range_index { RANGE_STD = 0, /* Standard */ RANGE_EXT = 0x80000000, /* Extended */ + RANGE_TSM = 0x80860000, /* Transmeta */ + RANGE_CTR = 0xc0000000, /* Centaur/Zhaoxin */ }; #define CPUID_INDEX_MASK 0xffff0000 @@ -85,6 +87,8 @@ struct cpuid_range { static struct cpuid_range ranges[] = { { .index = RANGE_STD, }, { .index = RANGE_EXT, }, + { .index = RANGE_TSM, }, + { .index = RANGE_CTR, }, }; static char *range_to_str(struct cpuid_range *range) @@ -92,6 +96,8 @@ static char *range_to_str(struct cpuid_range *range) switch (range->index) { case RANGE_STD: return "Standard"; case RANGE_EXT: return "Extended"; + case RANGE_TSM: return "Transmeta"; + case RANGE_CTR: return "Centaur"; default: return NULL; } } From e1dde2f5a4ef4a0e91a67adebb7037bbfbcb0c50 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:37 +0100 Subject: [PATCH 016/101] tools/x86/kcpuid: Update bitfields to x86-cpuid-db v2.0 Update kcpuid's CSV file to version v2.0, as generated by x86-cpuid-db. Summary of the v2.0 changes: * Introduce the leaves: - Leaf 0x00000003, Transmeta Processor serial number - Leaf 0x80860000, Transmeta max leaf number + CPU vendor ID - Leaf 0x80860001, Transmeta extended CPU information - Leaf 0x80860002, Transmeta Code Morphing Software (CMS) enumeration - Leaf 0x80860003 => 0x80860006, Transmeta CPU information string - Leaf 0x80860007, Transmeta "live" CPU information - Leaf 0xc0000000, Centaur/Zhaoxin's max leaf number - Leaf 0xc0000001, Centaur/Zhaoxin's extended CPU features * Add a 0x prefix for leaves 0x0 to 0x9. This maintains consistency with the rest of the CSV entries. * Add the new bitfields: - Leaf 0x7: nmi_src, NMI-source reporting - Leaf 0x80000001: e_base_type and e_mmx (Transmeta) * Update the section headers for leaves 0x80000000 and 0x80000005 to indicate that they are also valid for Transmeta. Notes: Leaf 0x3, being not unique to Transmeta, is handled at the generated CSV file v2.3 update, later in this patch queue. Leaf 0x80000001 EDX:23 bit, e_mmx, is also available on AMD. A bugfix is already merged at x86-cpuid-db's -tip for that, and it will be part of the project's upcoming v2.4 release.: https://gitlab.com/x86-cpuid.org/x86-cpuid-db/-/commit/65fff25daa41 Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://gitlab.com/x86-cpuid.org/x86-cpuid-db/-/blob/v2.0/CHANGELOG.rst Link: https://lore.kernel.org/r/20250324142042.29010-17-darwi@linutronix.de --- tools/arch/x86/kcpuid/cpuid.csv | 648 +++++++++++++++++++------------- 1 file changed, 382 insertions(+), 266 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index d751eb8585d0..d0f7159f99ba 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v1.0 +# Generator: x86-cpuid-db v2.0 # # Auto-generated file. @@ -12,297 +12,306 @@ # Leaf 0H # Maximum standard leaf number + CPU vendor string - 0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported - 0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 - 0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 - 0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 + 0x0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported + 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 + 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 + 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 # Leaf 1H # CPU FMS (Family/Model/Stepping) + standard feature flags - 1, 0, eax, 3:0, stepping , Stepping ID - 1, 0, eax, 7:4, base_model , Base CPU model ID - 1, 0, eax, 11:8, base_family_id , Base CPU family ID - 1, 0, eax, 13:12, cpu_type , CPU type - 1, 0, eax, 19:16, ext_model , Extended CPU model ID - 1, 0, eax, 27:20, ext_family , Extended CPU family ID - 1, 0, ebx, 7:0, brand_id , Brand index - 1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size - 1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count - 1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID - 1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) - 1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support - 1, 0, ecx, 2, dtes64 , 64-bit DS save area - 1, 0, ecx, 3, monitor , MONITOR/MWAIT support - 1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store - 1, 0, ecx, 5, vmx , Virtual Machine Extensions - 1, 0, ecx, 6, smx , Safer Mode Extensions - 1, 0, ecx, 7, est , Enhanced Intel SpeedStep - 1, 0, ecx, 8, tm2 , Thermal Monitor 2 - 1, 0, ecx, 9, ssse3 , Supplemental SSE3 - 1, 0, ecx, 10, cid , L1 Context ID - 1, 0, ecx, 11, sdbg , Sillicon Debug - 1, 0, ecx, 12, fma , FMA extensions using YMM state - 1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support - 1, 0, ecx, 14, xtpr , xTPR Update Control - 1, 0, ecx, 15, pdcm , Perfmon and Debug Capability - 1, 0, ecx, 17, pcid , Process-context identifiers - 1, 0, ecx, 18, dca , Direct Cache Access - 1, 0, ecx, 19, sse4_1 , SSE4.1 - 1, 0, ecx, 20, sse4_2 , SSE4.2 - 1, 0, ecx, 21, x2apic , X2APIC support - 1, 0, ecx, 22, movbe , MOVBE instruction support - 1, 0, ecx, 23, popcnt , POPCNT instruction support - 1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation - 1, 0, ecx, 25, aes , AES instructions - 1, 0, ecx, 26, xsave , XSAVE (and related instructions) support - 1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS - 1, 0, ecx, 28, avx , AVX instructions support - 1, 0, ecx, 29, f16c , Half-precision floating-point conversion support - 1, 0, ecx, 30, rdrand , RDRAND instruction support - 1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system - 1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) - 1, 0, edx, 1, vme , Virtual-8086 Mode Extensions - 1, 0, edx, 2, de , Debugging Extensions - 1, 0, edx, 3, pse , Page Size Extension - 1, 0, edx, 4, tsc , Time Stamp Counter - 1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) - 1, 0, edx, 6, pae , Physical Address Extensions - 1, 0, edx, 7, mce , Machine Check Exception - 1, 0, edx, 8, cx8 , CMPXCHG8B instruction - 1, 0, edx, 9, apic , APIC on-chip - 1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs - 1, 0, edx, 12, mtrr , Memory Type Range Registers - 1, 0, edx, 13, pge , Page Global Extensions - 1, 0, edx, 14, mca , Machine Check Architecture - 1, 0, edx, 15, cmov , Conditional Move Instruction - 1, 0, edx, 16, pat , Page Attribute Table - 1, 0, edx, 17, pse36 , Page Size Extension (36-bit) - 1, 0, edx, 18, pn , Processor Serial Number - 1, 0, edx, 19, clflush , CLFLUSH instruction - 1, 0, edx, 21, dts , Debug Store - 1, 0, edx, 22, acpi , Thermal monitor and clock control - 1, 0, edx, 23, mmx , MMX instructions - 1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions - 1, 0, edx, 25, sse , SSE instructions - 1, 0, edx, 26, sse2 , SSE2 instructions - 1, 0, edx, 27, ss , Self Snoop - 1, 0, edx, 28, ht , Hyper-threading - 1, 0, edx, 29, tm , Thermal Monitor - 1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved - 1, 0, edx, 31, pbe , Pending Break Enable + 0x1, 0, eax, 3:0, stepping , Stepping ID + 0x1, 0, eax, 7:4, base_model , Base CPU model ID + 0x1, 0, eax, 11:8, base_family_id , Base CPU family ID + 0x1, 0, eax, 13:12, cpu_type , CPU type + 0x1, 0, eax, 19:16, ext_model , Extended CPU model ID + 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID + 0x1, 0, ebx, 7:0, brand_id , Brand index + 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size + 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count + 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID + 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) + 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support + 0x1, 0, ecx, 2, dtes64 , 64-bit DS save area + 0x1, 0, ecx, 3, monitor , MONITOR/MWAIT support + 0x1, 0, ecx, 4, ds_cpl , CPL Qualified Debug Store + 0x1, 0, ecx, 5, vmx , Virtual Machine Extensions + 0x1, 0, ecx, 6, smx , Safer Mode Extensions + 0x1, 0, ecx, 7, est , Enhanced Intel SpeedStep + 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2 + 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3 + 0x1, 0, ecx, 10, cid , L1 Context ID + 0x1, 0, ecx, 11, sdbg , Sillicon Debug + 0x1, 0, ecx, 12, fma , FMA extensions using YMM state + 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support + 0x1, 0, ecx, 14, xtpr , xTPR Update Control + 0x1, 0, ecx, 15, pdcm , Perfmon and Debug Capability + 0x1, 0, ecx, 17, pcid , Process-context identifiers + 0x1, 0, ecx, 18, dca , Direct Cache Access + 0x1, 0, ecx, 19, sse4_1 , SSE4.1 + 0x1, 0, ecx, 20, sse4_2 , SSE4.2 + 0x1, 0, ecx, 21, x2apic , X2APIC support + 0x1, 0, ecx, 22, movbe , MOVBE instruction support + 0x1, 0, ecx, 23, popcnt , POPCNT instruction support + 0x1, 0, ecx, 24, tsc_deadline_timer , APIC timer one-shot operation + 0x1, 0, ecx, 25, aes , AES instructions + 0x1, 0, ecx, 26, xsave , XSAVE (and related instructions) support + 0x1, 0, ecx, 27, osxsave , XSAVE (and related instructions) are enabled by OS + 0x1, 0, ecx, 28, avx , AVX instructions support + 0x1, 0, ecx, 29, f16c , Half-precision floating-point conversion support + 0x1, 0, ecx, 30, rdrand , RDRAND instruction support + 0x1, 0, ecx, 31, guest_status , System is running as guest; (para-)virtualized system + 0x1, 0, edx, 0, fpu , Floating-Point Unit on-chip (x87) + 0x1, 0, edx, 1, vme , Virtual-8086 Mode Extensions + 0x1, 0, edx, 2, de , Debugging Extensions + 0x1, 0, edx, 3, pse , Page Size Extension + 0x1, 0, edx, 4, tsc , Time Stamp Counter + 0x1, 0, edx, 5, msr , Model-Specific Registers (RDMSR and WRMSR support) + 0x1, 0, edx, 6, pae , Physical Address Extensions + 0x1, 0, edx, 7, mce , Machine Check Exception + 0x1, 0, edx, 8, cx8 , CMPXCHG8B instruction + 0x1, 0, edx, 9, apic , APIC on-chip + 0x1, 0, edx, 11, sep , SYSENTER, SYSEXIT, and associated MSRs + 0x1, 0, edx, 12, mtrr , Memory Type Range Registers + 0x1, 0, edx, 13, pge , Page Global Extensions + 0x1, 0, edx, 14, mca , Machine Check Architecture + 0x1, 0, edx, 15, cmov , Conditional Move Instruction + 0x1, 0, edx, 16, pat , Page Attribute Table + 0x1, 0, edx, 17, pse36 , Page Size Extension (36-bit) + 0x1, 0, edx, 18, pn , Processor Serial Number + 0x1, 0, edx, 19, clflush , CLFLUSH instruction + 0x1, 0, edx, 21, dts , Debug Store + 0x1, 0, edx, 22, acpi , Thermal monitor and clock control + 0x1, 0, edx, 23, mmx , MMX instructions + 0x1, 0, edx, 24, fxsr , FXSAVE and FXRSTOR instructions + 0x1, 0, edx, 25, sse , SSE instructions + 0x1, 0, edx, 26, sse2 , SSE2 instructions + 0x1, 0, edx, 27, ss , Self Snoop + 0x1, 0, edx, 28, ht , Hyper-threading + 0x1, 0, edx, 29, tm , Thermal Monitor + 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved + 0x1, 0, edx, 31, pbe , Pending Break Enable # Leaf 2H # Intel cache and TLB information one-byte descriptors - 2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried - 2, 0, eax, 15:8, desc1 , Descriptor #1 - 2, 0, eax, 23:16, desc2 , Descriptor #2 - 2, 0, eax, 30:24, desc3 , Descriptor #3 - 2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set - 2, 0, ebx, 7:0, desc4 , Descriptor #4 - 2, 0, ebx, 15:8, desc5 , Descriptor #5 - 2, 0, ebx, 23:16, desc6 , Descriptor #6 - 2, 0, ebx, 30:24, desc7 , Descriptor #7 - 2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set - 2, 0, ecx, 7:0, desc8 , Descriptor #8 - 2, 0, ecx, 15:8, desc9 , Descriptor #9 - 2, 0, ecx, 23:16, desc10 , Descriptor #10 - 2, 0, ecx, 30:24, desc11 , Descriptor #11 - 2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set - 2, 0, edx, 7:0, desc12 , Descriptor #12 - 2, 0, edx, 15:8, desc13 , Descriptor #13 - 2, 0, edx, 23:16, desc14 , Descriptor #14 - 2, 0, edx, 30:24, desc15 , Descriptor #15 - 2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set + 0x2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried + 0x2, 0, eax, 15:8, desc1 , Descriptor #1 + 0x2, 0, eax, 23:16, desc2 , Descriptor #2 + 0x2, 0, eax, 30:24, desc3 , Descriptor #3 + 0x2, 0, eax, 31, eax_invalid , Descriptors 1-3 are invalid if set + 0x2, 0, ebx, 7:0, desc4 , Descriptor #4 + 0x2, 0, ebx, 15:8, desc5 , Descriptor #5 + 0x2, 0, ebx, 23:16, desc6 , Descriptor #6 + 0x2, 0, ebx, 30:24, desc7 , Descriptor #7 + 0x2, 0, ebx, 31, ebx_invalid , Descriptors 4-7 are invalid if set + 0x2, 0, ecx, 7:0, desc8 , Descriptor #8 + 0x2, 0, ecx, 15:8, desc9 , Descriptor #9 + 0x2, 0, ecx, 23:16, desc10 , Descriptor #10 + 0x2, 0, ecx, 30:24, desc11 , Descriptor #11 + 0x2, 0, ecx, 31, ecx_invalid , Descriptors 8-11 are invalid if set + 0x2, 0, edx, 7:0, desc12 , Descriptor #12 + 0x2, 0, edx, 15:8, desc13 , Descriptor #13 + 0x2, 0, edx, 23:16, desc14 , Descriptor #14 + 0x2, 0, edx, 30:24, desc15 , Descriptor #15 + 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set + +# Leaf 3H +# Transmeta Processor Serial Number (PSN) + + 0x3, 0, eax, 31:0, cpu_psn_0 , Processor Serial Number bytes 0 - 3 + 0x3, 0, ebx, 31:0, cpu_psn_1 , Processor Serial Number bytes 4 - 7 + 0x3, 0, ecx, 31:0, cpu_psn_2 , Processor Serial Number bytes 8 - 11 + 0x3, 0, edx, 31:0, cpu_psn_3 , Processor Serial Number bytes 12 - 15 # Leaf 4H # Intel deterministic cache parameters - 4, 31:0, eax, 4:0, cache_type , Cache type field - 4, 31:0, eax, 7:5, cache_level , Cache level (1-based) - 4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level - 4, 31:0, eax, 9, fully_associative , Fully-associative cache - 4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache - 4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package - 4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) - 4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) - 4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) - 4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) - 4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches - 4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches - 4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) + 0x4, 31:0, eax, 4:0, cache_type , Cache type field + 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based) + 0x4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level + 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache + 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache + 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package + 0x4, 31:0, ebx, 11:0, cache_linesize , System coherency line size (0-based) + 0x4, 31:0, ebx, 21:12, cache_npartitions , Physical line partitions (0-based) + 0x4, 31:0, ebx, 31:22, cache_nways , Ways of associativity (0-based) + 0x4, 31:0, ecx, 30:0, cache_nsets , Cache number of sets (0-based) + 0x4, 31:0, edx, 0, wbinvd_rll_no_guarantee, WBINVD/INVD not guaranteed for Remote Lower-Level caches + 0x4, 31:0, edx, 1, ll_inclusive , Cache is inclusive of Lower-Level caches + 0x4, 31:0, edx, 2, complex_indexing , Not a direct-mapped cache (complex function) # Leaf 5H # MONITOR/MWAIT instructions enumeration - 5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes - 5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes - 5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported - 5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported - 5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT - 5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT - 5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT - 5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT - 5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT - 5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT - 5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT - 5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT + 0x5, 0, eax, 15:0, min_mon_size , Smallest monitor-line size, in bytes + 0x5, 0, ebx, 15:0, max_mon_size , Largest monitor-line size, in bytes + 0x5, 0, ecx, 0, mwait_ext , Enumeration of MONITOR/MWAIT extensions is supported + 0x5, 0, ecx, 1, mwait_irq_break , Interrupts as a break-event for MWAIT is supported + 0x5, 0, edx, 3:0, n_c0_substates , Number of C0 sub C-states supported using MWAIT + 0x5, 0, edx, 7:4, n_c1_substates , Number of C1 sub C-states supported using MWAIT + 0x5, 0, edx, 11:8, n_c2_substates , Number of C2 sub C-states supported using MWAIT + 0x5, 0, edx, 15:12, n_c3_substates , Number of C3 sub C-states supported using MWAIT + 0x5, 0, edx, 19:16, n_c4_substates , Number of C4 sub C-states supported using MWAIT + 0x5, 0, edx, 23:20, n_c5_substates , Number of C5 sub C-states supported using MWAIT + 0x5, 0, edx, 27:24, n_c6_substates , Number of C6 sub C-states supported using MWAIT + 0x5, 0, edx, 31:28, n_c7_substates , Number of C7 sub C-states supported using MWAIT # Leaf 6H # Thermal and Power Management enumeration - 6, 0, eax, 0, dtherm , Digital temprature sensor - 6, 0, eax, 1, turbo_boost , Intel Turbo Boost - 6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) - 6, 0, eax, 4, pln , Power Limit Notification (PLN) event - 6, 0, eax, 5, ecmd , Clock modulation duty cycle extension - 6, 0, eax, 6, pts , Package thermal management - 6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported - 6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) - 6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported - 6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference - 6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request - 6, 0, eax, 13, hdc_base_regs , HDC base registers are supported - 6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 - 6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change - 6, 0, eax, 16, hwp_peci_override , HWP PECI override - 6, 0, eax, 17, hwp_flexible , Flexible HWP - 6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode - 6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported - 6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported - 6, 0, eax, 23, thread_director , Intel thread director support - 6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported - 6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds - 6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) - 6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support - 6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director - 6, 0, edx, 0, perfcap_reporting , Performance capability reporting - 6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting - 6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages - 6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based + 0x6, 0, eax, 0, dtherm , Digital temprature sensor + 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost + 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) + 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event + 0x6, 0, eax, 5, ecmd , Clock modulation duty cycle extension + 0x6, 0, eax, 6, pts , Package thermal management + 0x6, 0, eax, 7, hwp , HWP (Hardware P-states) base registers are supported + 0x6, 0, eax, 8, hwp_notify , HWP notification (IA32_HWP_INTERRUPT MSR) + 0x6, 0, eax, 9, hwp_act_window , HWP activity window (IA32_HWP_REQUEST[bits 41:32]) supported + 0x6, 0, eax, 10, hwp_epp , HWP Energy Performance Preference + 0x6, 0, eax, 11, hwp_pkg_req , HWP Package Level Request + 0x6, 0, eax, 13, hdc_base_regs , HDC base registers are supported + 0x6, 0, eax, 14, turbo_boost_3_0 , Intel Turbo Boost Max 3.0 + 0x6, 0, eax, 15, hwp_capabilities , HWP Highest Performance change + 0x6, 0, eax, 16, hwp_peci_override , HWP PECI override + 0x6, 0, eax, 17, hwp_flexible , Flexible HWP + 0x6, 0, eax, 18, hwp_fast , IA32_HWP_REQUEST MSR fast access mode + 0x6, 0, eax, 19, hfi , HW_FEEDBACK MSRs supported + 0x6, 0, eax, 20, hwp_ignore_idle , Ignoring idle logical CPU HWP req is supported + 0x6, 0, eax, 23, thread_director , Intel thread director support + 0x6, 0, eax, 24, therm_interrupt_bit25 , IA32_THERM_INTERRUPT MSR bit 25 is supported + 0x6, 0, ebx, 3:0, n_therm_thresholds , Digital thermometer thresholds + 0x6, 0, ecx, 0, aperfmperf , MPERF/APERF MSRs (effective frequency interface) + 0x6, 0, ecx, 3, epb , IA32_ENERGY_PERF_BIAS MSR support + 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director + 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting + 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting + 0x6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages + 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based # Leaf 7H # Extended CPU features enumeration - 7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves - 7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support - 7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported - 7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) - 7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 - 7, 0, ebx, 4, hle , Hardware Lock Elision - 7, 0, ebx, 5, avx2 , AVX2 instruction set - 7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions - 7, 0, ebx, 7, smep , Supervisor Mode Execution Protection - 7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 - 7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB - 7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) - 7, 0, ebx, 11, rtm , Intel restricted transactional memory - 7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring - 7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) - 7, 0, ebx, 14, mpx , Intel memory protection extensions - 7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent - 7, 0, ebx, 16, avx512f , AVX-512 foundation instructions - 7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions - 7, 0, ebx, 18, rdseed , RDSEED instruction - 7, 0, ebx, 19, adx , ADCX/ADOX instructions - 7, 0, ebx, 20, smap , Supervisor mode access prevention - 7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add - 7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction - 7, 0, ebx, 24, clwb , CLWB instruction - 7, 0, ebx, 25, intel_pt , Intel processor trace - 7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions - 7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs - 7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs - 7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions - 7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions - 7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions - 7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) - 7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs - 7, 0, ecx, 2, umip , User mode instruction protection - 7, 0, ecx, 3, pku , Protection keys for user-space - 7, 0, ecx, 4, ospke , OS protection keys enable - 7, 0, ecx, 5, waitpkg , WAITPKG instructions - 7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 - 7, 0, ecx, 7, cet_ss , CET shadow stack features - 7, 0, ecx, 8, gfni , Galois field new instructions - 7, 0, ecx, 9, vaes , Vector AES instrs - 7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support - 7, 0, ecx, 11, avx512_vnni , Vector neural network instructions - 7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle - 7, 0, ecx, 13, tme , Intel total memory encryption - 7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW - 7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) - 7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode - 7, 0, ecx, 22, rdpid , RDPID instruction - 7, 0, ecx, 23, key_locker , Intel key locker support - 7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection - 7, 0, ecx, 25, cldemote , CLDEMOTE instruction - 7, 0, ecx, 27, movdiri , MOVDIRI instruction - 7, 0, ecx, 28, movdir64b , MOVDIR64B instruction - 7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) - 7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration - 7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages - 7, 0, edx, 1, sgx_keys , Intel SGX attestation services - 7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions - 7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision - 7, 0, edx, 4, fsrm , Fast short REP MOV - 7, 0, edx, 5, uintr , CPU supports user interrupts - 7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions - 7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available - 7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support - 7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts - 7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported - 7, 0, edx, 14, serialize , SERIALIZE instruction - 7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' - 7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking - 7, 0, edx, 18, pconfig , PCONFIG instruction - 7, 0, edx, 19, arch_lbr , Intel architectural LBRs - 7, 0, edx, 20, ibt , CET indirect branch tracking - 7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support - 7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions - 7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support - 7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support - 7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) - 7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors - 7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR - 7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR - 7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR - 7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable - 7, 1, eax, 4, avx_vnni , AVX-VNNI instructions - 7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions - 7, 1, eax, 6, lass , Linear address space separation - 7, 1, eax, 7, cmpccxadd , CMPccXADD instructions - 7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported - 7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB - 7, 1, eax, 11, fsrs , Fast short REP STOSB - 7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB - 7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions - 7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS - 7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) - 7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations - 7, 1, eax, 22, hreset , History reset support - 7, 1, eax, 23, avx_ifma , Integer fused multiply add - 7, 1, eax, 26, lam , Linear address masking - 7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions - 7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) - 7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions - 7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions - 7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) - 7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions - 7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use - 7, 2, edx, 0, intel_psfd , Intel predictive store forward disable - 7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} - 7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} - 7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U - 7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S - 7, 2, edx, 5, mcdt_no , MCDT mitigation not needed - 7, 2, edx, 6, uclock_disable , UC-lock disable is supported + 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves + 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support + 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported + 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) + 0x7, 0, ebx, 3, bmi1 , Bit manipulation extensions group 1 + 0x7, 0, ebx, 4, hle , Hardware Lock Elision + 0x7, 0, ebx, 5, avx2 , AVX2 instruction set + 0x7, 0, ebx, 6, fdp_excptn_only , FPU Data Pointer updated only on x87 exceptions + 0x7, 0, ebx, 7, smep , Supervisor Mode Execution Protection + 0x7, 0, ebx, 8, bmi2 , Bit manipulation extensions group 2 + 0x7, 0, ebx, 9, erms , Enhanced REP MOVSB/STOSB + 0x7, 0, ebx, 10, invpcid , INVPCID instruction (Invalidate Processor Context ID) + 0x7, 0, ebx, 11, rtm , Intel restricted transactional memory + 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring + 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) + 0x7, 0, ebx, 14, mpx , Intel memory protection extensions + 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent + 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions + 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions + 0x7, 0, ebx, 18, rdseed , RDSEED instruction + 0x7, 0, ebx, 19, adx , ADCX/ADOX instructions + 0x7, 0, ebx, 20, smap , Supervisor mode access prevention + 0x7, 0, ebx, 21, avx512ifma , AVX-512 integer fused multiply add + 0x7, 0, ebx, 23, clflushopt , CLFLUSHOPT instruction + 0x7, 0, ebx, 24, clwb , CLWB instruction + 0x7, 0, ebx, 25, intel_pt , Intel processor trace + 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions + 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs + 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs + 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions + 0x7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions + 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions + 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) + 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs + 0x7, 0, ecx, 2, umip , User mode instruction protection + 0x7, 0, ecx, 3, pku , Protection keys for user-space + 0x7, 0, ecx, 4, ospke , OS protection keys enable + 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions + 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 + 0x7, 0, ecx, 7, cet_ss , CET shadow stack features + 0x7, 0, ecx, 8, gfni , Galois field new instructions + 0x7, 0, ecx, 9, vaes , Vector AES instrs + 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support + 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions + 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle + 0x7, 0, ecx, 13, tme , Intel total memory encryption + 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW + 0x7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) + 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode + 0x7, 0, ecx, 22, rdpid , RDPID instruction + 0x7, 0, ecx, 23, key_locker , Intel key locker support + 0x7, 0, ecx, 24, bus_lock_detect , OS bus-lock detection + 0x7, 0, ecx, 25, cldemote , CLDEMOTE instruction + 0x7, 0, ecx, 27, movdiri , MOVDIRI instruction + 0x7, 0, ecx, 28, movdir64b , MOVDIR64B instruction + 0x7, 0, ecx, 29, enqcmd , Enqueue stores supported (ENQCMD{,S}) + 0x7, 0, ecx, 30, sgx_lc , Intel SGX launch configuration + 0x7, 0, ecx, 31, pks , Protection keys for supervisor-mode pages + 0x7, 0, edx, 1, sgx_keys , Intel SGX attestation services + 0x7, 0, edx, 2, avx512_4vnniw , AVX-512 neural network instructions + 0x7, 0, edx, 3, avx512_4fmaps , AVX-512 multiply accumulation single precision + 0x7, 0, edx, 4, fsrm , Fast short REP MOV + 0x7, 0, edx, 5, uintr , CPU supports user interrupts + 0x7, 0, edx, 8, avx512_vp2intersect , VP2INTERSECT{D,Q} instructions + 0x7, 0, edx, 9, srdbs_ctrl , SRBDS mitigation MSR available + 0x7, 0, edx, 10, md_clear , VERW MD_CLEAR microcode support + 0x7, 0, edx, 11, rtm_always_abort , XBEGIN (RTM transaction) always aborts + 0x7, 0, edx, 13, tsx_force_abort , MSR TSX_FORCE_ABORT, RTM_ABORT bit, supported + 0x7, 0, edx, 14, serialize , SERIALIZE instruction + 0x7, 0, edx, 15, hybrid_cpu , The CPU is identified as a 'hybrid part' + 0x7, 0, edx, 16, tsxldtrk , TSX suspend/resume load address tracking + 0x7, 0, edx, 18, pconfig , PCONFIG instruction + 0x7, 0, edx, 19, arch_lbr , Intel architectural LBRs + 0x7, 0, edx, 20, ibt , CET indirect branch tracking + 0x7, 0, edx, 22, amx_bf16 , AMX-BF16: tile bfloat16 support + 0x7, 0, edx, 23, avx512_fp16 , AVX-512 FP16 instructions + 0x7, 0, edx, 24, amx_tile , AMX-TILE: tile architecture support + 0x7, 0, edx, 25, amx_int8 , AMX-INT8: tile 8-bit integer support + 0x7, 0, edx, 26, spec_ctrl , Speculation Control (IBRS/IBPB: indirect branch restrictions) + 0x7, 0, edx, 27, intel_stibp , Single thread indirect branch predictors + 0x7, 0, edx, 28, flush_l1d , FLUSH L1D cache: IA32_FLUSH_CMD MSR + 0x7, 0, edx, 29, arch_capabilities , Intel IA32_ARCH_CAPABILITIES MSR + 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR + 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable + 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions + 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions + 0x7, 1, eax, 6, lass , Linear address space separation + 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions + 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported + 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB + 0x7, 1, eax, 11, fsrs , Fast short REP STOSB + 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB + 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions + 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS + 0x7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) + 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data + 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations + 0x7, 1, eax, 22, hreset , History reset support + 0x7, 1, eax, 23, avx_ifma , Integer fused multiply add + 0x7, 1, eax, 26, lam , Linear address masking + 0x7, 1, eax, 27, rd_wr_msrlist , RDMSRLIST/WRMSRLIST instructions + 0x7, 1, ebx, 0, intel_ppin , Protected processor inventory number (PPIN{,_CTL} MSRs) + 0x7, 1, edx, 4, avx_vnni_int8 , AVX-VNNI-INT8 instructions + 0x7, 1, edx, 5, avx_ne_convert , AVX-NE-CONVERT instructions + 0x7, 1, edx, 8, amx_complex , AMX-COMPLEX instructions (starting from Granite Rapids) + 0x7, 1, edx, 14, prefetchit_0_1 , PREFETCHIT0/1 instructions + 0x7, 1, edx, 18, cet_sss , CET supervisor shadow stacks safe to use + 0x7, 2, edx, 0, intel_psfd , Intel predictive store forward disable + 0x7, 2, edx, 1, ipred_ctrl , MSR bits IA32_SPEC_CTRL.IPRED_DIS_{U,S} + 0x7, 2, edx, 2, rrsba_ctrl , MSR bits IA32_SPEC_CTRL.RRSBA_DIS_{U,S} + 0x7, 2, edx, 3, ddp_ctrl , MSR bit IA32_SPEC_CTRL.DDPD_U + 0x7, 2, edx, 4, bhi_ctrl , MSR bit IA32_SPEC_CTRL.BHI_DIS_S + 0x7, 2, edx, 5, mcdt_no , MCDT mitigation not needed + 0x7, 2, edx, 6, uclock_disable , UC-lock disable is supported # Leaf 9H # Intel DCA (Direct Cache Access) enumeration - 9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS + 0x9, 0, eax, 0, dca_enabled_in_bios , DCA is enabled in BIOS # Leaf AH # Intel PMU (Performance Monitoring Unit) enumeration @@ -623,7 +632,7 @@ 0x40000000, 0, edx, 31:0, hypervisor_id_2 , Hypervisor ID string bytes 8 - 11 # Leaf 80000000H -# Maximum extended leaf number + CPU vendor string (AMD) +# Maximum extended leaf number + AMD/Transmeta CPU vendor string 0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported 0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3 @@ -636,6 +645,7 @@ 0x80000001, 0, eax, 3:0, e_stepping_id , Stepping ID 0x80000001, 0, eax, 7:4, e_base_model , Base processor model 0x80000001, 0, eax, 11:8, e_base_family , Base processor family +0x80000001, 0, eax, 13:12, e_base_type , Base processor type (Transmeta) 0x80000001, 0, eax, 19:16, e_ext_model , Extended processor model 0x80000001, 0, eax, 27:20, e_ext_family , Extended processor family 0x80000001, 0, ebx, 15:0, brand_id , Brand ID @@ -687,6 +697,7 @@ 0x80000001, 0, edx, 19, mp , Out-of-spec AMD Multiprocessing bit 0x80000001, 0, edx, 20, nx , No-execute page protection 0x80000001, 0, edx, 22, mmxext , AMD MMX extensions +0x80000001, 0, edx, 23, e_mmx , MMX instructions (Transmeta) 0x80000001, 0, edx, 24, e_fxsr , FXSAVE and FXRSTOR instructions 0x80000001, 0, edx, 25, fxsr_opt , FXSAVE and FXRSTOR optimizations 0x80000001, 0, edx, 26, pdpe1gb , 1-GB large page support @@ -720,7 +731,7 @@ 0x80000004, 0, edx, 31:0, cpu_brandid_11 , CPU brand ID string, bytes 44 - 47 # Leaf 80000005H -# AMD L1 cache and L1 TLB enumeration +# AMD/Transmeta L1 cache and L1 TLB enumeration 0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages 0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages @@ -1051,3 +1062,108 @@ 0x80000026, 3:0, ecx, 7:0, domain_level , This domain level (subleaf ID) 0x80000026, 3:0, ecx, 15:8, domain_type , This domain type 0x80000026, 3:0, edx, 31:0, x2apic_id , x2APIC ID of current logical CPU + +# Leaf 80860000H +# Maximum Transmeta leaf number + CPU vendor ID string + +0x80860000, 0, eax, 31:0, max_tra_leaf , Maximum supported Transmeta leaf number +0x80860000, 0, ebx, 31:0, cpu_vendorid_0 , Transmeta Vendor ID string bytes 0 - 3 +0x80860000, 0, ecx, 31:0, cpu_vendorid_2 , Transmeta Vendor ID string bytes 8 - 11 +0x80860000, 0, edx, 31:0, cpu_vendorid_1 , Transmeta Vendor ID string bytes 4 - 7 + +# Leaf 80860001H +# Transmeta extended CPU information + +0x80860001, 0, eax, 3:0, stepping , Stepping ID +0x80860001, 0, eax, 7:4, base_model , Base CPU model ID +0x80860001, 0, eax, 11:8, base_family_id , Base CPU family ID +0x80860001, 0, eax, 13:12, cpu_type , CPU type +0x80860001, 0, ebx, 7:0, cpu_rev_mask_minor , CPU revision ID, mask minor +0x80860001, 0, ebx, 15:8, cpu_rev_mask_major , CPU revision ID, mask major +0x80860001, 0, ebx, 23:16, cpu_rev_minor , CPU revision ID, minor +0x80860001, 0, ebx, 31:24, cpu_rev_major , CPU revision ID, major +0x80860001, 0, ecx, 31:0, cpu_base_mhz , CPU nominal frequency, in MHz +0x80860001, 0, edx, 0, recovery , Recovery CMS is active (after bad flush) +0x80860001, 0, edx, 1, longrun , LongRun power management capabilities +0x80860001, 0, edx, 3, lrti , LongRun Table Interface + +# Leaf 80860002H +# Transmeta Code Morphing Software (CMS) enumeration + +0x80860002, 0, eax, 31:0, cpu_rev_id , CPU revision ID +0x80860002, 0, ebx, 7:0, cms_rev_mask_2 , CMS revision ID, mask component 2 +0x80860002, 0, ebx, 15:8, cms_rev_mask_1 , CMS revision ID, mask component 1 +0x80860002, 0, ebx, 23:16, cms_rev_minor , CMS revision ID, minor +0x80860002, 0, ebx, 31:24, cms_rev_major , CMS revision ID, major +0x80860002, 0, ecx, 31:0, cms_rev_mask_3 , CMS revision ID, mask component 3 + +# Leaf 80860003H +# Transmeta CPU information string, bytes 0 - 15 + +0x80860003, 0, eax, 31:0, cpu_info_0 , CPU info string bytes 0 - 3 +0x80860003, 0, ebx, 31:0, cpu_info_1 , CPU info string bytes 4 - 7 +0x80860003, 0, ecx, 31:0, cpu_info_2 , CPU info string bytes 8 - 11 +0x80860003, 0, edx, 31:0, cpu_info_3 , CPU info string bytes 12 - 15 + +# Leaf 80860004H +# Transmeta CPU information string, bytes 16 - 31 + +0x80860004, 0, eax, 31:0, cpu_info_4 , CPU info string bytes 16 - 19 +0x80860004, 0, ebx, 31:0, cpu_info_5 , CPU info string bytes 20 - 23 +0x80860004, 0, ecx, 31:0, cpu_info_6 , CPU info string bytes 24 - 27 +0x80860004, 0, edx, 31:0, cpu_info_7 , CPU info string bytes 28 - 31 + +# Leaf 80860005H +# Transmeta CPU information string, bytes 32 - 47 + +0x80860005, 0, eax, 31:0, cpu_info_8 , CPU info string bytes 32 - 35 +0x80860005, 0, ebx, 31:0, cpu_info_9 , CPU info string bytes 36 - 39 +0x80860005, 0, ecx, 31:0, cpu_info_10 , CPU info string bytes 40 - 43 +0x80860005, 0, edx, 31:0, cpu_info_11 , CPU info string bytes 44 - 47 + +# Leaf 80860006H +# Transmeta CPU information string, bytes 48 - 63 + +0x80860006, 0, eax, 31:0, cpu_info_12 , CPU info string bytes 48 - 51 +0x80860006, 0, ebx, 31:0, cpu_info_13 , CPU info string bytes 52 - 55 +0x80860006, 0, ecx, 31:0, cpu_info_14 , CPU info string bytes 56 - 59 +0x80860006, 0, edx, 31:0, cpu_info_15 , CPU info string bytes 60 - 63 + +# Leaf 80860007H +# Transmeta live CPU information + +0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz +0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts +0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100d +0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds + +# Leaf C0000000H +# Maximum Centaur/Zhaoxin leaf number + +0xc0000000, 0, eax, 31:0, max_cntr_leaf , Maximum Centaur/Zhaoxin leaf number + +# Leaf C0000001H +# Centaur/Zhaoxin extended CPU features + +0xc0000001, 0, edx, 0, ccs_sm2 , CCS SM2 instructions +0xc0000001, 0, edx, 1, ccs_sm2_en , CCS SM2 enabled +0xc0000001, 0, edx, 2, xstore , Random Number Generator +0xc0000001, 0, edx, 3, xstore_en , RNG enabled +0xc0000001, 0, edx, 4, ccs_sm3_sm4 , CCS SM3 and SM4 instructions +0xc0000001, 0, edx, 5, ccs_sm3_sm4_en , CCS SM3/SM4 enabled +0xc0000001, 0, edx, 6, ace , Advanced Cryptography Engine +0xc0000001, 0, edx, 7, ace_en , ACE enabled +0xc0000001, 0, edx, 8, ace2 , Advanced Cryptography Engine v2 +0xc0000001, 0, edx, 9, ace2_en , ACE v2 enabled +0xc0000001, 0, edx, 10, phe , PadLock Hash Engine +0xc0000001, 0, edx, 11, phe_en , PHE enabled +0xc0000001, 0, edx, 12, pmm , PadLock Montgomery Multiplier +0xc0000001, 0, edx, 13, pmm_en , PMM enabled +0xc0000001, 0, edx, 16, parallax , Parallax auto adjust processor voltage +0xc0000001, 0, edx, 17, parallax_en , Parallax enabled +0xc0000001, 0, edx, 20, tm3 , Thermal Monitor v3 +0xc0000001, 0, edx, 21, tm3_en , TM v3 enabled +0xc0000001, 0, edx, 25, phe2 , PadLock Hash Engine v2 (SHA384/SHA512) +0xc0000001, 0, edx, 26, phe2_en , PHE v2 enabled +0xc0000001, 0, edx, 27, rsa , RSA instructions (XMODEXP/MONTMUL2) +0xc0000001, 0, edx, 28, rsa_en , RSA instructions enabled From f5e7fd6857963e697600d881242bbb39b0b9ac37 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:38 +0100 Subject: [PATCH 017/101] tools/x86/kcpuid: Update bitfields to x86-cpuid-db v2.1 Update kcpuid's CSV file to version 2.1, as generated by x86-cpuid-db. Summary of the v2.1 changes: * Use a standardized style for all x86 trademarks, registers, opcodes, byte units, hexadecimal digits, and x86 technical terms. This was enforced by a number of x86-specific hunspell(5) dictionary and affix files at the x86-cpuid-db project's CI pipeline. * Expand abbreviated terms that might be OK in code but not in official listings (e.g., "addr", "instr", "reg", "virt", etc.) * Add new Zen5 SoC bits to leaf 0x80000020 and leaf 0x80000021. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://gitlab.com/x86-cpuid.org/x86-cpuid-db/-/blob/v2.1/CHANGELOG.rst Link: https://lore.kernel.org/r/20250324142042.29010-18-darwi@linutronix.de --- tools/arch/x86/kcpuid/cpuid.csv | 171 +++++++++++++++++--------------- 1 file changed, 91 insertions(+), 80 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index d0f7159f99ba..0f9c11208674 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v2.0 +# Generator: x86-cpuid-db v2.1 # # Auto-generated file. @@ -28,7 +28,7 @@ 0x1, 0, eax, 27:20, ext_family , Extended CPU family ID 0x1, 0, ebx, 7:0, brand_id , Brand index 0x1, 0, ebx, 15:8, clflush_size , CLFLUSH instruction cache line size - 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU (HW threads) count + 0x1, 0, ebx, 23:16, n_logical_cpu , Logical CPU count 0x1, 0, ebx, 31:24, local_apic_id , Initial local APIC physical ID 0x1, 0, ecx, 0, pni , Streaming SIMD Extensions 3 (SSE3) 0x1, 0, ecx, 1, pclmulqdq , PCLMULQDQ instruction support @@ -41,7 +41,7 @@ 0x1, 0, ecx, 8, tm2 , Thermal Monitor 2 0x1, 0, ecx, 9, ssse3 , Supplemental SSE3 0x1, 0, ecx, 10, cid , L1 Context ID - 0x1, 0, ecx, 11, sdbg , Sillicon Debug + 0x1, 0, ecx, 11, sdbg , Silicon Debug 0x1, 0, ecx, 12, fma , FMA extensions using YMM state 0x1, 0, ecx, 13, cx16 , CMPXCHG16B instruction support 0x1, 0, ecx, 14, xtpr , xTPR Update Control @@ -89,13 +89,13 @@ 0x1, 0, edx, 27, ss , Self Snoop 0x1, 0, edx, 28, ht , Hyper-threading 0x1, 0, edx, 29, tm , Thermal Monitor - 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now resreved + 0x1, 0, edx, 30, ia64 , Legacy IA-64 (Itanium) support bit, now reserved 0x1, 0, edx, 31, pbe , Pending Break Enable # Leaf 2H # Intel cache and TLB information one-byte descriptors - 0x2, 0, eax, 7:0, iteration_count , Number of times this CPUD leaf must be queried + 0x2, 0, eax, 7:0, iteration_count , Number of times this leaf must be queried 0x2, 0, eax, 15:8, desc1 , Descriptor #1 0x2, 0, eax, 23:16, desc2 , Descriptor #2 0x2, 0, eax, 30:24, desc3 , Descriptor #3 @@ -129,7 +129,7 @@ 0x4, 31:0, eax, 4:0, cache_type , Cache type field 0x4, 31:0, eax, 7:5, cache_level , Cache level (1-based) - 0x4, 31:0, eax, 8, cache_self_init , Self-initialializing cache level + 0x4, 31:0, eax, 8, cache_self_init , Self-initializing cache level 0x4, 31:0, eax, 9, fully_associative , Fully-associative cache 0x4, 31:0, eax, 25:14, num_threads_sharing , Number logical CPUs sharing this cache 0x4, 31:0, eax, 31:26, num_cores_on_die , Number of cores in the physical package @@ -160,7 +160,7 @@ # Leaf 6H # Thermal and Power Management enumeration - 0x6, 0, eax, 0, dtherm , Digital temprature sensor + 0x6, 0, eax, 0, dtherm , Digital temperature sensor 0x6, 0, eax, 1, turbo_boost , Intel Turbo Boost 0x6, 0, eax, 2, arat , Always-Running APIC Timer (not affected by p-state) 0x6, 0, eax, 4, pln , Power Limit Notification (PLN) event @@ -187,8 +187,8 @@ 0x6, 0, ecx, 15:8, thrd_director_nclasses , Number of classes, Intel thread director 0x6, 0, edx, 0, perfcap_reporting , Performance capability reporting 0x6, 0, edx, 1, encap_reporting , Energy efficiency capability reporting - 0x6, 0, edx, 11:8, feedback_sz , HW feedback interface struct size, in 4K pages - 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU index @ HW feedback struct, 0-based + 0x6, 0, edx, 11:8, feedback_sz , Feedback interface structure size, in 4K pages + 0x6, 0, edx, 31:16, this_lcpu_hwfdbk_idx , This logical CPU hardware feedback interface index # Leaf 7H # Extended CPU features enumeration @@ -209,7 +209,7 @@ 0x7, 0, ebx, 12, cqm , Intel RDT-CMT / AMD Platform-QoS cache monitoring 0x7, 0, ebx, 13, zero_fcs_fds , Deprecated FPU CS/DS (stored as zero) 0x7, 0, ebx, 14, mpx , Intel memory protection extensions - 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcemeent + 0x7, 0, ebx, 15, rdt_a , Intel RDT / AMD Platform-QoS Enforcement 0x7, 0, ebx, 16, avx512f , AVX-512 foundation instructions 0x7, 0, ebx, 17, avx512dq , AVX-512 double/quadword instructions 0x7, 0, ebx, 18, rdseed , RDSEED instruction @@ -220,27 +220,27 @@ 0x7, 0, ebx, 24, clwb , CLWB instruction 0x7, 0, ebx, 25, intel_pt , Intel processor trace 0x7, 0, ebx, 26, avx512pf , AVX-512 prefetch instructions - 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instrs - 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instrs + 0x7, 0, ebx, 27, avx512er , AVX-512 exponent/reciprocal instructions + 0x7, 0, ebx, 28, avx512cd , AVX-512 conflict detection instructions 0x7, 0, ebx, 29, sha_ni , SHA/SHA256 instructions - 0x7, 0, ebx, 30, avx512bw , AVX-512 BW (byte/word granular) instructions + 0x7, 0, ebx, 30, avx512bw , AVX-512 byte/word instructions 0x7, 0, ebx, 31, avx512vl , AVX-512 VL (128/256 vector length) extensions 0x7, 0, ecx, 0, prefetchwt1 , PREFETCHWT1 (Intel Xeon Phi only) - 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instrs + 0x7, 0, ecx, 1, avx512vbmi , AVX-512 Vector byte manipulation instructions 0x7, 0, ecx, 2, umip , User mode instruction protection 0x7, 0, ecx, 3, pku , Protection keys for user-space 0x7, 0, ecx, 4, ospke , OS protection keys enable 0x7, 0, ecx, 5, waitpkg , WAITPKG instructions - 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instrs group 2 + 0x7, 0, ecx, 6, avx512_vbmi2 , AVX-512 vector byte manipulation instructions group 2 0x7, 0, ecx, 7, cet_ss , CET shadow stack features 0x7, 0, ecx, 8, gfni , Galois field new instructions - 0x7, 0, ecx, 9, vaes , Vector AES instrs + 0x7, 0, ecx, 9, vaes , Vector AES instructions 0x7, 0, ecx, 10, vpclmulqdq , VPCLMULQDQ 256-bit instruction support 0x7, 0, ecx, 11, avx512_vnni , Vector neural network instructions - 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bit count/shiffle + 0x7, 0, ecx, 12, avx512_bitalg , AVX-512 bitwise algorithms 0x7, 0, ecx, 13, tme , Intel total memory encryption - 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DW/QW - 0x7, 0, ecx, 16, la57 , 57-bit linear addreses (five-level paging) + 0x7, 0, ecx, 14, avx512_vpopcntdq , AVX-512: POPCNT for vectors of DWORD/QWORD + 0x7, 0, ecx, 16, la57 , 57-bit linear addresses (five-level paging) 0x7, 0, ecx, 21:17, mawau_val_lm , BNDLDX/BNDSTX MAWAU value in 64-bit mode 0x7, 0, ecx, 22, rdpid , RDPID instruction 0x7, 0, ecx, 23, key_locker , Intel key locker support @@ -278,7 +278,7 @@ 0x7, 0, edx, 30, core_capabilities , IA32_CORE_CAPABILITIES MSR 0x7, 0, edx, 31, spec_ctrl_ssbd , Speculative store bypass disable 0x7, 1, eax, 4, avx_vnni , AVX-VNNI instructions - 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bFloat16 instructions + 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions 0x7, 1, eax, 6, lass , Linear address space separation 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported @@ -287,7 +287,7 @@ 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB 0x7, 1, eax, 17, fred , FRED: Flexible return and event delivery transitions 0x7, 1, eax, 18, lkgs , LKGS: Load 'kernel' (userspace) GS - 0x7, 1, eax, 19, wrmsrns , WRMSRNS instr (WRMSR-non-serializing) + 0x7, 1, eax, 19, wrmsrns , WRMSRNS instruction (WRMSR-non-serializing) 0x7, 1, eax, 20, nmi_src , NMI-source reporting with FRED event data 0x7, 1, eax, 21, amx_fp16 , AMX-FP16: FP16 tile operations 0x7, 1, eax, 22, hreset , History reset support @@ -348,18 +348,18 @@ 0xd, 0, eax, 0, xcr0_x87 , XCR0.X87 (bit 0) supported 0xd, 0, eax, 1, xcr0_sse , XCR0.SEE (bit 1) supported 0xd, 0, eax, 2, xcr0_avx , XCR0.AVX (bit 2) supported - 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU reg) - 0xd, 0, eax, 11, xcr0_cet_u , AMD XCR0.CET_U (bit 11) supported (CET supervisor state) - 0xd, 0, eax, 12, xcr0_cet_s , AMD XCR0.CET_S (bit 12) support (CET user state) + 0xd, 0, eax, 3, xcr0_mpx_bndregs , XCR0.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0xd, 0, eax, 4, xcr0_mpx_bndcsr , XCR0.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0xd, 0, eax, 5, xcr0_avx512_opmask , XCR0.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0xd, 0, eax, 6, xcr0_avx512_zmm_hi256 , XCR0.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0xd, 0, eax, 7, xcr0_avx512_hi16_zmm , XCR0.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0xd, 0, eax, 9, xcr0_pkru , XCR0.PKRU (bit 9) supported (XSAVE PKRU registers) + 0xd, 0, eax, 11, xcr0_cet_u , XCR0.CET_U (bit 11) supported (CET user state) + 0xd, 0, eax, 12, xcr0_cet_s , XCR0.CET_S (bit 12) supported (CET supervisor state) 0xd, 0, eax, 17, xcr0_tileconfig , XCR0.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0xd, 0, eax, 18, xcr0_tiledata , XCR0.TILEDATA (bit 18) supported (AMX can manage TILEDATA) - 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTR area byte size, for XCR0 enabled features - 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTR area max byte size, all CPU features + 0xd, 0, ebx, 31:0, xsave_sz_xcr0_enabled , XSAVE/XRSTOR area byte size, for XCR0 enabled features + 0xd, 0, ecx, 31:0, xsave_sz_max , XSAVE/XRSTOR area max byte size, all CPU features 0xd, 0, edx, 30, xcr0_lwp , AMD XCR0.LWP (bit 62) supported (Light-weight Profiling) 0xd, 1, eax, 0, xsaveopt , XSAVEOPT instruction 0xd, 1, eax, 1, xsavec , XSAVEC instruction @@ -378,7 +378,7 @@ 0xd, 63:2, eax, 31:0, xsave_sz , Size of save area for subleaf-N feature, in bytes 0xd, 63:2, ebx, 31:0, xsave_offset , Offset of save area for subleaf-N feature, in bytes 0xd, 63:2, ecx, 0, is_xss_bit , Subleaf N describes an XSS bit, otherwise XCR0 bit - 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature xsave area is 64-byte aligned + 0xd, 63:2, ecx, 1, compacted_xsave_64byte_aligned, When compacted, subleaf-N feature XSAVE area is 64-byte aligned # Leaf FH # Intel RDT / AMD PQoS resource monitoring @@ -435,17 +435,17 @@ 0x12, 1, ecx, 0, xfrm_x87 , Enclave XFRM.X87 (bit 0) supported 0x12, 1, ecx, 1, xfrm_sse , Enclave XFRM.SEE (bit 1) supported 0x12, 1, ecx, 2, xfrm_avx , Enclave XFRM.AVX (bit 2) supported - 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 regs) - 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS regs) - 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 regs) - 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 regs) - 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 regs) - 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU reg) + 0x12, 1, ecx, 3, xfrm_mpx_bndregs , Enclave XFRM.BNDREGS (bit 3) supported (MPX BND0-BND3 registers) + 0x12, 1, ecx, 4, xfrm_mpx_bndcsr , Enclave XFRM.BNDCSR (bit 4) supported (MPX BNDCFGU/BNDSTATUS registers) + 0x12, 1, ecx, 5, xfrm_avx512_opmask , Enclave XFRM.OPMASK (bit 5) supported (AVX-512 k0-k7 registers) + 0x12, 1, ecx, 6, xfrm_avx512_zmm_hi256 , Enclave XFRM.ZMM_Hi256 (bit 6) supported (AVX-512 ZMM0->ZMM7/15 registers) + 0x12, 1, ecx, 7, xfrm_avx512_hi16_zmm , Enclave XFRM.HI16_ZMM (bit 7) supported (AVX-512 ZMM16->ZMM31 registers) + 0x12, 1, ecx, 9, xfrm_pkru , Enclave XFRM.PKRU (bit 9) supported (XSAVE PKRU registers) 0x12, 1, ecx, 17, xfrm_tileconfig , Enclave XFRM.TILECONFIG (bit 17) supported (AMX can manage TILECONFIG) 0x12, 1, ecx, 18, xfrm_tiledata , Enclave XFRM.TILEDATA (bit 18) supported (AMX can manage TILEDATA) 0x12, 31:2, eax, 3:0, subleaf_type , Subleaf type (dictates output layout) - 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base addr, bits[12:31] - 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base addr, bits[32:51] + 0x12, 31:2, eax, 31:12, epc_sec_base_addr_0 , EPC section base address, bits[12:31] + 0x12, 31:2, ebx, 19:0, epc_sec_base_addr_1 , EPC section base address, bits[32:51] 0x12, 31:2, ecx, 3:0, epc_sec_type , EPC section type / property encoding 0x12, 31:2, ecx, 31:12, epc_sec_size_0 , EPC section size, bits[12:31] 0x12, 31:2, edx, 19:0, epc_sec_size_1 , EPC section size, bits[32:51] @@ -481,7 +481,7 @@ 0x15, 0, ecx, 31:0, cpu_crystal_hz , Core crystal clock nominal frequency, in Hz # Leaf 16H -# Intel processor fequency enumeration +# Intel processor frequency enumeration 0x16, 0, eax, 15:0, cpu_base_mhz , Processor base frequency, in MHz 0x16, 0, ebx, 15:0, cpu_max_mhz , Processor max frequency, in MHz @@ -492,7 +492,7 @@ 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID - 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumaeratoion scheme (not Intel) + 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel) 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor 0x17, 0, edx, 31:0, soc_stepping_id , Soc project stepping ID, assigned by vendor 0x17, 3:1, eax, 31:0, vendor_brand_a , Vendor Brand ID string, bytes subleaf_nr * (0 -> 3) @@ -508,13 +508,13 @@ 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported 0x18, 31:0, ebx, 3, tlb_1g_page , TLB 1GB-page entries supported - 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this struct + 0x18, 31:0, ebx, 10:8, hard_partitioning , (Hard/Soft) partitioning between logical CPUs sharing this structure 0x18, 31:0, ebx, 31:16, n_way_associative , Ways of associativity 0x18, 31:0, ecx, 31:0, n_sets , Number of sets 0x18, 31:0, edx, 4:0, tlb_type , Translation cache type (TLB type) 0x18, 31:0, edx, 7:5, tlb_cache_level , Translation cache level (1-based) 0x18, 31:0, edx, 8, is_fully_associative , Fully-associative structure - 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max num of addressible IDs for logical CPUs sharing this TLB - 1 + 0x18, 31:0, edx, 25:14, tlb_max_addressible_ids, Max number of addressable IDs for logical CPUs sharing this TLB - 1 # Leaf 19H # Intel Key Locker enumeration @@ -577,7 +577,7 @@ # Intel AMX, TMUL (Tile-matrix MULtiply) accelerator unit enumeration 0x1e, 0, ebx, 7:0, tmul_maxk , TMUL unit maximum height, K (rows or columns) - 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maxiumum SIMD dimension, N (column bytes) + 0x1e, 0, ebx, 23:8, tmul_maxn , TMUL unit maximum SIMD dimension, N (column bytes) # Leaf 1FH # Intel extended topology enumeration v2 @@ -733,9 +733,9 @@ # Leaf 80000005H # AMD/Transmeta L1 cache and L1 TLB enumeration -0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entires, 2M and 4M pages +0x80000005, 0, eax, 7:0, l1_itlb_2m_4m_nentries , L1 ITLB #entries, 2M and 4M pages 0x80000005, 0, eax, 15:8, l1_itlb_2m_4m_assoc , L1 ITLB associativity, 2M and 4M pages -0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entires, 2M and 4M pages +0x80000005, 0, eax, 23:16, l1_dtlb_2m_4m_nentries , L1 DTLB #entries, 2M and 4M pages 0x80000005, 0, eax, 31:24, l1_dtlb_2m_4m_assoc , L1 DTLB associativity, 2M and 4M pages 0x80000005, 0, ebx, 7:0, l1_itlb_4k_nentries , L1 ITLB #entries, 4K pages 0x80000005, 0, ebx, 15:8, l1_itlb_4k_assoc , L1 ITLB associativity, 4K pages @@ -774,11 +774,11 @@ # CPU power management (mostly AMD) and AMD RAS enumeration 0x80000007, 0, ebx, 0, overflow_recov , MCA overflow conditions not fatal -0x80000007, 0, ebx, 1, succor , Software containment of UnCORRectable errors +0x80000007, 0, ebx, 1, succor , Software containment of uncorrectable errors 0x80000007, 0, ebx, 2, hw_assert , Hardware assert MSRs 0x80000007, 0, ebx, 3, smca , Scalable MCA (MCAX MSRs) 0x80000007, 0, ecx, 31:0, cpu_pwr_sample_ratio , CPU power sample time ratio -0x80000007, 0, edx, 0, digital_temp , Digital temprature sensor +0x80000007, 0, edx, 0, digital_temp , Digital temperature sensor 0x80000007, 0, edx, 1, powernow_freq_id , PowerNOW! frequency scaling 0x80000007, 0, edx, 2, powernow_volt_id , PowerNOW! voltage scaling 0x80000007, 0, edx, 3, thermal_trip , THERMTRIP (Thermal Trip) @@ -821,7 +821,7 @@ 0x80000008, 0, ebx, 23, amd_ppin , Protected Processor Inventory Number 0x80000008, 0, ebx, 24, amd_ssbd , Speculative Store Bypass Disable 0x80000008, 0, ebx, 25, virt_ssbd , virtualized SSBD (Speculative Store Bypass Disable) -0x80000008, 0, ebx, 26, amd_ssb_no , SSBD not needed (fixed in HW) +0x80000008, 0, ebx, 26, amd_ssb_no , SSBD is not needed (fixed in hardware) 0x80000008, 0, ebx, 27, cppc , Collaborative Processor Performance Control 0x80000008, 0, ebx, 28, amd_psfd , Predictive Store Forward Disable 0x80000008, 0, ebx, 29, btc_no , CPU not affected by Branch Type Confusion @@ -849,7 +849,7 @@ 0x8000000a, 0, edx, 10, pausefilter , Pause intercept filter 0x8000000a, 0, edx, 12, pfthreshold , Pause filter threshold 0x8000000a, 0, edx, 13, avic , Advanced virtual interrupt controller -0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virt) +0x8000000a, 0, edx, 15, v_vmsave_vmload , Virtual VMSAVE/VMLOAD (nested virtualization) 0x8000000a, 0, edx, 16, vgif , Virtualize the Global Interrupt Flag 0x8000000a, 0, edx, 17, gmet , Guest mode execution trap 0x8000000a, 0, edx, 18, x2avic , Virtual x2APIC @@ -861,7 +861,7 @@ 0x8000000a, 0, edx, 25, vnmi , NMI virtualization 0x8000000a, 0, edx, 26, ibs_virt , IBS Virtualization 0x8000000a, 0, edx, 27, ext_lvt_off_chg , Extended LVT offset fault change -0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME addr check +0x8000000a, 0, edx, 28, svme_addr_chk , Guest SVME address check # Leaf 80000019H # AMD TLB 1G-pages enumeration @@ -902,20 +902,20 @@ # AMD LWP (Lightweight Profiling) 0x8000001c, 0, eax, 0, os_lwp_avail , LWP is available to application programs (supported by OS) -0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction (EventId=1) is supported by OS -0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event (EventId=2) is supported by OS -0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event (EventId=3) is supported by OS -0x8000001c, 0, eax, 4, os_lwp_dme , DCache Miss Event (EventId=4) is supported by OS -0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is supported by OS -0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is supported by OS +0x8000001c, 0, eax, 1, os_lpwval , LWPVAL instruction is supported by OS +0x8000001c, 0, eax, 2, os_lwp_ire , Instructions Retired Event is supported by OS +0x8000001c, 0, eax, 3, os_lwp_bre , Branch Retired Event is supported by OS +0x8000001c, 0, eax, 4, os_lwp_dme , Dcache Miss Event is supported by OS +0x8000001c, 0, eax, 5, os_lwp_cnh , CPU Clocks Not Halted event is supported by OS +0x8000001c, 0, eax, 6, os_lwp_rnh , CPU Reference clocks Not Halted event is supported by OS 0x8000001c, 0, eax, 29, os_lwp_cont , LWP sampling in continuous mode is supported by OS 0x8000001c, 0, eax, 30, os_lwp_ptsc , Performance Time Stamp Counter in event records is supported by OS 0x8000001c, 0, eax, 31, os_lwp_int , Interrupt on threshold overflow is supported by OS 0x8000001c, 0, ebx, 7:0, lwp_lwpcb_sz , LWP Control Block size, in quadwords 0x8000001c, 0, ebx, 15:8, lwp_event_sz , LWP event record size, in bytes -0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventId value (EventID 255 not included) +0x8000001c, 0, ebx, 23:16, lwp_max_events , LWP max supported EventID value (EventID 255 not included) 0x8000001c, 0, ebx, 31:24, lwp_event_offset , LWP events area offset in the LWP Control Block -0x8000001c, 0, ecx, 4:0, lwp_latency_max , Num of bits in cache latency counters (10 to 31) +0x8000001c, 0, ecx, 4:0, lwp_latency_max , Number of bits in cache latency counters (10 to 31) 0x8000001c, 0, ecx, 5, lwp_data_adddr , Cache miss events report the data address of the reference 0x8000001c, 0, ecx, 8:6, lwp_latency_rnd , Amount by which cache latency is rounded 0x8000001c, 0, ecx, 15:9, lwp_version , LWP implementation version @@ -924,16 +924,16 @@ 0x8000001c, 0, ecx, 29, lwp_ip_filtering , IP filtering (IPI, IPF, BaseIP, and LimitIP @ LWPCP) supported 0x8000001c, 0, ecx, 30, lwp_cache_levels , Cache-related events can be filtered by cache level 0x8000001c, 0, ecx, 31, lwp_cache_latency , Cache-related events can be filtered by latency -0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in Hardware -0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction (EventId=1) is available in HW -0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event (EventId=2) is available in HW -0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event (EventId=3) is available in HW -0x8000001c, 0, edx, 4, hw_lwp_dme , DCache Miss Event (EventId=4) is available in HW -0x8000001c, 0, edx, 5, hw_lwp_cnh , CPU Clocks Not Halted event (EventId=5) is available in HW -0x8000001c, 0, edx, 6, hw_lwp_rnh , CPU Reference clocks Not Halted event (EventId=6) is available in HW -0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in HW -0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in HW -0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in HW +0x8000001c, 0, edx, 0, hw_lwp_avail , LWP is available in hardware +0x8000001c, 0, edx, 1, hw_lpwval , LWPVAL instruction is available in hardware +0x8000001c, 0, edx, 2, hw_lwp_ire , Instructions Retired Event is available in hardware +0x8000001c, 0, edx, 3, hw_lwp_bre , Branch Retired Event is available in hardware +0x8000001c, 0, edx, 4, hw_lwp_dme , Dcache Miss Event is available in hardware +0x8000001c, 0, edx, 5, hw_lwp_cnh , Clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 6, hw_lwp_rnh , Reference clocks Not Halted event is available in hardware +0x8000001c, 0, edx, 29, hw_lwp_cont , LWP sampling in continuous mode is available in hardware +0x8000001c, 0, edx, 30, hw_lwp_ptsc , Performance Time Stamp Counter in event records is available in hardware +0x8000001c, 0, edx, 31, hw_lwp_int , Interrupt on threshold overflow is available in hardware # Leaf 8000001DH # AMD deterministic cache parameters @@ -969,10 +969,10 @@ 0x8000001f, 0, eax, 4, sev_nested_paging , SEV secure nested paging supported 0x8000001f, 0, eax, 5, vm_permission_levels , VMPL supported 0x8000001f, 0, eax, 6, rpmquery , RPMQUERY instruction supported -0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadwo stack supported +0x8000001f, 0, eax, 7, vmpl_sss , VMPL supervisor shadow stack supported 0x8000001f, 0, eax, 8, secure_tsc , Secure TSC supported 0x8000001f, 0, eax, 9, v_tsc_aux , Hardware virtualizes TSC_AUX -0x8000001f, 0, eax, 10, sme_coherent , HW enforces cache coherency across encryption domains +0x8000001f, 0, eax, 10, sme_coherent , Cache coherency is enforced across encryption domains 0x8000001f, 0, eax, 11, req_64bit_hypervisor , SEV guest mandates 64-bit hypervisor 0x8000001f, 0, eax, 12, restricted_injection , Restricted Injection supported 0x8000001f, 0, eax, 13, alternate_injection , Alternate Injection supported @@ -984,13 +984,13 @@ 0x8000001f, 0, eax, 19, virt_ibs , IBS state virtualization is supported for SEV-ES guests 0x8000001f, 0, eax, 24, vmsa_reg_protection , VMSA register protection is supported 0x8000001f, 0, eax, 25, smt_protection , SMT protection is supported -0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000h) is supported +0x8000001f, 0, eax, 28, svsm_page_msr , SVSM communication page MSR (0xc001f000) is supported 0x8000001f, 0, eax, 29, nested_virt_snp_msr , VIRT_RMPUPDATE/VIRT_PSMASH MSRs are supported 0x8000001f, 0, ebx, 5:0, pte_cbit_pos , PTE bit number used to enable memory encryption 0x8000001f, 0, ebx, 11:6, phys_addr_reduction_nbits, Reduction of phys address space when encryption is enabled, in bits 0x8000001f, 0, ebx, 15:12, vmpl_count , Number of VM permission levels (VMPL) supported 0x8000001f, 0, ecx, 31:0, enc_guests_max , Max supported number of simultaneous encrypted guests -0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Mininum ASID for SEV-enabled SEV-ES-disabled guest +0x8000001f, 0, edx, 31:0, min_sev_asid_no_sev_es , Minimum ASID for SEV-enabled SEV-ES-disabled guest # Leaf 80000020H # AMD Platform QoS extended feature IDs @@ -999,6 +999,8 @@ 0x80000020, 0, ebx, 2, smba , Slow Memory Bandwidth Allocation support 0x80000020, 0, ebx, 3, bmec , Bandwidth Monitoring Event Configuration support 0x80000020, 0, ebx, 4, l3rr , L3 Range Reservation support +0x80000020, 0, ebx, 5, abmc , Assignable Bandwidth Monitoring Counters +0x80000020, 0, ebx, 6, sdciae , Smart Data Cache Injection (SDCI) Allocation Enforcement 0x80000020, 1, eax, 31:0, mba_limit_len , MBA enforcement limit size 0x80000020, 1, edx, 31:0, mba_cos_max , MBA max Class of Service number (zero-based) 0x80000020, 2, eax, 31:0, smba_limit_len , SMBA enforcement limit size @@ -1023,12 +1025,21 @@ 0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported 0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported 0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present -0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep Stosb (FSRS) is supported -0x80000021, 0, eax, 11, fsrc_supported , Fast Short Repe Cmpsb (FSRC) is supported +0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep STOSB (FSRS) is supported +0x80000021, 0, eax, 11, fsrc_supported , Fast Short Rep CMPSB (FSRC) is supported 0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported +0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space 0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported 0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported -0x80000021, 0, ebx, 11:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS +0x80000021, 0, eax, 24, eraps_support , Enhanced Return Address Predictor Security +0x80000021, 0, eax, 27, sbpb , Support for the Selective Branch Predictor Barrier +0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor +0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability +0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary +0x80000021, 0, eax, 31, srso_msr_fix , Software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO +0x80000021, 0, ebx, 15:0, microcode_patch_size , Size of microcode patch, in 16-byte units +0x80000021, 0, ebx, 23:16, rap_size , Return Address Predictor size # Leaf 80000022H # AMD Performance Monitoring v2 enumeration @@ -1036,7 +1047,7 @@ 0x80000022, 0, eax, 0, perfmon_v2 , Performance monitoring v2 supported 0x80000022, 0, eax, 1, lbr_v2 , Last Branch Record v2 extensions (LBR Stack) 0x80000022, 0, eax, 2, lbr_pmc_freeze , Freezing core performance counters / LBR Stack supported -0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core perfomance counters +0x80000022, 0, ebx, 3:0, n_pmc_core , Number of core performance counters 0x80000022, 0, ebx, 9:4, lbr_v2_stack_size , Number of available LBR stack entries 0x80000022, 0, ebx, 15:10, n_pmc_northbridge , Number of available northbridge (data fabric) performance counters 0x80000022, 0, ebx, 21:16, n_pmc_umc , Number of available UMC performance counters @@ -1046,7 +1057,7 @@ # AMD Secure Multi-key Encryption enumeration 0x80000023, 0, eax, 0, mem_hmk_mode , MEM-HMK encryption mode is supported -0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total num of available encryption keys +0x80000023, 0, ebx, 15:0, mem_hmk_avail_keys , MEM-HMK mode: total number of available encryption keys # Leaf 80000026H # AMD extended topology enumeration v2 @@ -1134,7 +1145,7 @@ 0x80860007, 0, eax, 31:0, cpu_cur_mhz , Current CPU frequency, in MHz 0x80860007, 0, ebx, 31:0, cpu_cur_voltage , Current CPU voltage, in millivolts -0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100d +0x80860007, 0, ecx, 31:0, cpu_cur_perf_pctg , Current CPU performance percentage, 0 - 100 0x80860007, 0, edx, 31:0, cpu_cur_gate_delay , Current CPU gate delay, in femtoseconds # Leaf C0000000H From 5e0c3c5e95f09236e5f7a73615bfa2666c58f182 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:39 +0100 Subject: [PATCH 018/101] tools/x86/kcpuid: Update bitfields to x86-cpuid-db v2.2 Update kcpuid's CSV file to version 2.2, as generated by x86-cpuid-db. Per Ingo Molnar's feedback, it is desired to always use CPUID in its capitalized form. The v2.2 release fixed all instances of small case "cpuid" at the project's XML database, and thus all of its generated files. Reported-by: Ingo Molnar Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://gitlab.com/x86-cpuid.org/x86-cpuid-db/-/blob/v2.2/CHANGELOG.rst Link: https://lore.kernel.org/r/20250324142042.29010-19-darwi@linutronix.de Closes: https://lkml.kernel.org/r/Z8bHK391zKE4gUEW@gmail.com --- tools/arch/x86/kcpuid/cpuid.csv | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index 0f9c11208674..9613e09cbfb3 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v2.1 +# Generator: x86-cpuid-db v2.2 # # Auto-generated file. @@ -12,7 +12,7 @@ # Leaf 0H # Maximum standard leaf number + CPU vendor string - 0x0, 0, eax, 31:0, max_std_leaf , Highest cpuid standard leaf supported + 0x0, 0, eax, 31:0, max_std_leaf , Highest standard CPUID leaf supported 0x0, 0, ebx, 31:0, cpu_vendorid_0 , CPU vendor ID string bytes 0 - 3 0x0, 0, ecx, 31:0, cpu_vendorid_2 , CPU vendor ID string bytes 8 - 11 0x0, 0, edx, 31:0, cpu_vendorid_1 , CPU vendor ID string bytes 4 - 7 @@ -193,7 +193,7 @@ # Leaf 7H # Extended CPU features enumeration - 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of cpuid 0x7 subleaves + 0x7, 0, eax, 31:0, leaf7_n_subleaves , Number of leaf 0x7 subleaves 0x7, 0, ebx, 0, fsgsbase , FSBASE/GSBASE read/write support 0x7, 0, ebx, 1, tsc_adjust , IA32_TSC_ADJUST MSR supported 0x7, 0, ebx, 2, sgx , Intel SGX (Software Guard Extensions) @@ -281,7 +281,7 @@ 0x7, 1, eax, 5, avx512_bf16 , AVX-512 bfloat16 instructions 0x7, 1, eax, 6, lass , Linear address space separation 0x7, 1, eax, 7, cmpccxadd , CMPccXADD instructions - 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: CPUID leaf 0x23 is supported + 0x7, 1, eax, 8, arch_perfmon_ext , ArchPerfmonExt: leaf 0x23 is supported 0x7, 1, eax, 10, fzrm , Fast zero-length REP MOVSB 0x7, 1, eax, 11, fsrs , Fast short REP STOSB 0x7, 1, eax, 12, fsrc , Fast Short REP CMPSB/SCASB @@ -319,7 +319,7 @@ 0xa, 0, eax, 7:0, pmu_version , Performance monitoring unit version ID 0xa, 0, eax, 15:8, pmu_n_gcounters , Number of general PMU counters per logical CPU 0xa, 0, eax, 23:16, pmu_gcounters_nbits , Bitwidth of PMU general counters - 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of cpuid leaf 0xa EBX bit vector + 0xa, 0, eax, 31:24, pmu_cpuid_ebx_bits , Length of leaf 0xa EBX bit vector 0xa, 0, ebx, 0, no_core_cycle_evt , Core cycle event not available 0xa, 0, ebx, 1, no_insn_retired_evt , Instruction retired event not available 0xa, 0, ebx, 2, no_refcycle_evt , Reference cycles event not available @@ -453,7 +453,7 @@ # Leaf 14H # Intel Processor Trace enumeration - 0x14, 0, eax, 31:0, pt_max_subleaf , Max cpuid 0x14 subleaf + 0x14, 0, eax, 31:0, pt_max_subleaf , Maximum leaf 0x14 subleaf 0x14, 0, ebx, 0, cr3_filtering , IA32_RTIT_CR3_MATCH is accessible 0x14, 0, ebx, 1, psb_cyc , Configurable PSB and cycle-accurate mode 0x14, 0, ebx, 2, ip_filtering , IP/TraceStop filtering; Warm-reset PT MSRs preservation @@ -490,7 +490,7 @@ # Leaf 17H # Intel SoC vendor attributes enumeration - 0x17, 0, eax, 31:0, soc_max_subleaf , Max cpuid leaf 0x17 subleaf + 0x17, 0, eax, 31:0, soc_max_subleaf , Maximum leaf 0x17 subleaf 0x17, 0, ebx, 15:0, soc_vendor_id , SoC vendor ID 0x17, 0, ebx, 16, is_vendor_scheme , Assigned by industry enumeration scheme (not Intel) 0x17, 0, ecx, 31:0, soc_proj_id , SoC project ID, assigned by vendor @@ -503,7 +503,7 @@ # Leaf 18H # Intel determenestic address translation (TLB) parameters - 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Max cpuid 0x18 subleaf + 0x18, 31:0, eax, 31:0, tlb_max_subleaf , Maximum leaf 0x18 subleaf 0x18, 31:0, ebx, 0, tlb_4k_page , TLB 4KB-page entries supported 0x18, 31:0, ebx, 1, tlb_2m_page , TLB 2MB-page entries supported 0x18, 31:0, ebx, 2, tlb_4m_page , TLB 4MB-page entries supported @@ -634,7 +634,7 @@ # Leaf 80000000H # Maximum extended leaf number + AMD/Transmeta CPU vendor string -0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended cpuid leaf supported +0x80000000, 0, eax, 31:0, max_ext_leaf , Maximum extended CPUID leaf supported 0x80000000, 0, ebx, 31:0, cpu_vendorid_0 , Vendor ID string bytes 0 - 3 0x80000000, 0, ecx, 31:0, cpu_vendorid_2 , Vendor ID string bytes 8 - 11 0x80000000, 0, edx, 31:0, cpu_vendorid_1 , Vendor ID string bytes 4 - 7 @@ -669,7 +669,7 @@ 0x80000001, 0, ecx, 17, tce , Translation cache extension 0x80000001, 0, ecx, 19, nodeid_msr , NodeId MSR (0xc001100c) 0x80000001, 0, ecx, 21, tbm , Trailing bit manipulations -0x80000001, 0, ecx, 22, topoext , Topology Extensions (cpuid leaf 0x8000001d) +0x80000001, 0, ecx, 22, topoext , Topology Extensions (leaf 0x8000001d) 0x80000001, 0, ecx, 23, perfctr_core , Core performance counter extensions 0x80000001, 0, ecx, 24, perfctr_nb , NB/DF performance counter extensions 0x80000001, 0, ecx, 26, bpext , Data access breakpoint extension From 300ba891418ae72df8d16fc26886f1d049c70907 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:40 +0100 Subject: [PATCH 019/101] tools/x86/kcpuid: Update bitfields to x86-cpuid-db v2.3 Update kcpuid's CSV file to version 2.3, as generated by x86-cpuid-db. Summary of the v2.3 changes: * Per H. Peter Anvin's feedback, leaf 0x3 is not unique to Transmeta as the CSV file earlier claimed. Since leaf 0x3's format differs between Intel and Transmeta, and the project does not yet support having the same CPUID bitfield with varying interpretations across vendors, leaf 0x3 is removed for now. Given that Intel discontinued support for PSN from Pentium 4 onward, and Linux force disables it on early boot for privacy concerns, this should have minimal impact. * Leaf 0x80000021: Make bitfield IDs and descriptions coherent with each other. Remove "_support" from bitfield IDs, as no other leaf has such convention. Reported-by: "H. Peter Anvin" Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://gitlab.com/x86-cpuid.org/x86-cpuid-db/-/blob/v2.3/CHANGELOG.rst Link: https://lore.kernel.org/r/20250324142042.29010-20-darwi@linutronix.de Closes: https://lkml.kernel.org/r/C7684E03-36E0-4D58-B6F0-78F4DB82D737@zytor.com --- tools/arch/x86/kcpuid/cpuid.csv | 30 +++++++++++------------------- 1 file changed, 11 insertions(+), 19 deletions(-) diff --git a/tools/arch/x86/kcpuid/cpuid.csv b/tools/arch/x86/kcpuid/cpuid.csv index 9613e09cbfb3..8d25b0b49f3b 100644 --- a/tools/arch/x86/kcpuid/cpuid.csv +++ b/tools/arch/x86/kcpuid/cpuid.csv @@ -1,5 +1,5 @@ # SPDX-License-Identifier: CC0-1.0 -# Generator: x86-cpuid-db v2.2 +# Generator: x86-cpuid-db v2.3 # # Auto-generated file. @@ -116,14 +116,6 @@ 0x2, 0, edx, 30:24, desc15 , Descriptor #15 0x2, 0, edx, 31, edx_invalid , Descriptors 12-15 are invalid if set -# Leaf 3H -# Transmeta Processor Serial Number (PSN) - - 0x3, 0, eax, 31:0, cpu_psn_0 , Processor Serial Number bytes 0 - 3 - 0x3, 0, ebx, 31:0, cpu_psn_1 , Processor Serial Number bytes 4 - 7 - 0x3, 0, ecx, 31:0, cpu_psn_2 , Processor Serial Number bytes 8 - 11 - 0x3, 0, edx, 31:0, cpu_psn_3 , Processor Serial Number bytes 12 - 15 - # Leaf 4H # Intel deterministic cache parameters @@ -1020,20 +1012,20 @@ 0x80000021, 0, eax, 0, no_nested_data_bp , No nested data breakpoints 0x80000021, 0, eax, 1, fsgs_non_serializing , WRMSR to {FS,GS,KERNEL_GS}_BASE is non-serializing 0x80000021, 0, eax, 2, lfence_rdtsc , LFENCE always serializing / synchronizes RDTSC -0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock is supported +0x80000021, 0, eax, 3, smm_page_cfg_lock , SMM paging configuration lock 0x80000021, 0, eax, 6, null_sel_clr_base , Null selector clears base -0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore Enable bit supported -0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS enable bit supported -0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not present -0x80000021, 0, eax, 10, fsrs_supported , Fast Short Rep STOSB (FSRS) is supported -0x80000021, 0, eax, 11, fsrc_supported , Fast Short Rep CMPSB (FSRC) is supported -0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is supported +0x80000021, 0, eax, 7, upper_addr_ignore , EFER MSR Upper Address Ignore +0x80000021, 0, eax, 8, autoibrs , EFER MSR Automatic IBRS +0x80000021, 0, eax, 9, no_smm_ctl_msr , SMM_CTL MSR (0xc0010116) is not available +0x80000021, 0, eax, 10, fsrs , Fast Short Rep STOSB +0x80000021, 0, eax, 11, fsrc , Fast Short Rep CMPSB +0x80000021, 0, eax, 13, prefetch_ctl_msr , Prefetch control MSR is available 0x80000021, 0, eax, 16, opcode_reclaim , Reserves opcode space 0x80000021, 0, eax, 17, user_cpuid_disable , #GP when executing CPUID at CPL > 0 is supported -0x80000021, 0, eax, 18, epsf_supported , Enhanced Predictive Store Forwarding (EPSF) is supported +0x80000021, 0, eax, 18, epsf , Enhanced Predictive Store Forwarding 0x80000021, 0, eax, 22, wl_feedback , Workload-based heuristic feedback to OS -0x80000021, 0, eax, 24, eraps_support , Enhanced Return Address Predictor Security -0x80000021, 0, eax, 27, sbpb , Support for the Selective Branch Predictor Barrier +0x80000021, 0, eax, 24, eraps , Enhanced Return Address Predictor Security +0x80000021, 0, eax, 27, sbpb , Selective Branch Predictor Barrier 0x80000021, 0, eax, 28, ibpb_brtype , Branch predictions flushed from CPU branch predictor 0x80000021, 0, eax, 29, srso_no , CPU is not subject to the SRSO vulnerability 0x80000021, 0, eax, 30, srso_uk_no , CPU is not vulnerable to SRSO at user-kernel boundary From 0efb4dc3b084579efe7f5f88370b76d4f1c3bcee Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 15:20:41 +0100 Subject: [PATCH 020/101] MAINTAINERS: Include the entire kcpuid/ directory under the X86 CPUID DATABASE entry kcpuid's CSV file is covered by the "x86 CPUID database" MAINTAINERS entry. Recent patches have shown that changes to that file may require updates to the kcpuid code, so include the whole of tools/x86/kcpuid/ under the same entry. This also ensures that myself and the x86-cpuid mailing list are CCed on future kcpuid patches. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Josh Poimboeuf Link: https://lore.kernel.org/r/20250324142042.29010-21-darwi@linutronix.de --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 3b5fa8436987..47dbac5517bb 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -25727,7 +25727,7 @@ R: Ahmed S. Darwish L: x86-cpuid@lists.linux.dev S: Maintained W: https://x86-cpuid.org -F: tools/arch/x86/kcpuid/cpuid.csv +F: tools/arch/x86/kcpuid/ X86 ENTRY CODE M: Andy Lutomirski From b5969494c8d8a2384c0636707a538efad15d8660 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:32:56 +0100 Subject: [PATCH 021/101] x86/cpu: Remove CPUID leaf 0x2 parsing loop Leaf 0x2 output includes a "query count" byte where it was supposed to specify the number of repeated CPUID leaf 0x2 subleaf 0 queries needed to extract all of the CPU's cache and TLB descriptors. Per current Intel manuals, all CPUs supporting this leaf "will always" return an iteration count of 1. Remove the leaf 0x2 query loop and just query the hardware once. Note, as previously done in: aec28d852ed2 ("x86/cpuid: Standardize on u32 in ") standardize on using 'u32' and 'u8' types. Suggested-by: Ingo Molnar Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-2-darwi@linutronix.de --- arch/x86/kernel/cpu/intel.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 4cbb2e69bea1..0570d4d86006 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -6,6 +6,7 @@ #include #include #include +#include #ifdef CONFIG_X86_64 #include @@ -777,28 +778,27 @@ static void intel_tlb_lookup(const unsigned char desc) static void intel_detect_tlb(struct cpuinfo_x86 *c) { - int i, j, n; - unsigned int regs[4]; - unsigned char *desc = (unsigned char *)regs; + u32 regs[4]; + u8 *desc = (u8 *)regs; if (c->cpuid_level < 2) return; - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - for (i = 0 ; i < n ; i++) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + /* Intel CPUs must report an iteration count of 1 */ + if (desc[0] != 0x01) + return; - /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 4 ; j++) - if (regs[j] & (1 << 31)) - regs[j] = 0; - - /* Byte 0 is level count, not a descriptor */ - for (j = 1 ; j < 16 ; j++) - intel_tlb_lookup(desc[j]); + /* If a register's bit 31 is set, it is an unknown format */ + for (int i = 0; i < 4; i++) { + if (regs[i] & (1 << 31)) + regs[i] = 0; } + + /* Skip the first byte as it is not a descriptor */ + for (int i = 1; i < 16; i++) + intel_tlb_lookup(desc[i]); } static const struct cpu_dev intel_cpu_dev = { From 09a1da4beb310420b720994f7b6599e8d0bce3e1 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:32:57 +0100 Subject: [PATCH 022/101] x86/cacheinfo: Remove CPUID leaf 0x2 parsing loop Leaf 0x2 output includes a "query count" byte where it was supposed to specify the number of repeated CPUID leaf 0x2 subleaf 0 queries needed to extract all of the CPU's cache and TLB descriptors. Per current Intel manuals, all CPUs supporting this leaf "will always" return an iteration count of 1. Remove the leaf 0x2 query loop and just query the hardware once. Note, as previously done at commit aec28d852ed2 ("x86/cpuid: Standardize on u32 in "), standardize on using 'u32' and 'u8' types. Suggested-by: Ingo Molnar Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-3-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 69 ++++++++++++++++----------------- 1 file changed, 33 insertions(+), 36 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b3a520959b51..36782fd017b3 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -42,7 +42,7 @@ static cpumask_var_t cpu_cacheinfo_mask; unsigned int memory_caching_control __ro_after_init; struct _cache_table { - unsigned char descriptor; + u8 descriptor; char cache_type; short size; }; @@ -783,50 +783,47 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Don't use CPUID(2) if CPUID(4) is supported. */ if (!ci->num_leaves && c->cpuid_level > 1) { - /* supports eax=2 call */ - int j, n; - unsigned int regs[4]; - unsigned char *dp = (unsigned char *)regs; + u32 regs[4]; + u8 *desc = (u8 *)regs; - /* Number of times to iterate */ - n = cpuid_eax(2) & 0xFF; + cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - for (i = 0 ; i < n ; i++) { - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); + /* Intel CPUs must report an iteration count of 1 */ + if (desc[0] != 0x01) + return; - /* If bit 31 is set, this is an unknown format */ - for (j = 0 ; j < 4 ; j++) - if (regs[j] & (1 << 31)) - regs[j] = 0; + /* If a register's bit 31 is set, it is an unknown format */ + for (int i = 0; i < 4; i++) { + if (regs[i] & (1 << 31)) + regs[i] = 0; + } - /* Byte 0 is level count, not a descriptor */ - for (j = 1 ; j < 16 ; j++) { - unsigned char des = dp[j]; - unsigned char k = 0; - - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) { - if (cache_table[k].descriptor == des) { - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - } + /* Skip the first byte as it is not a descriptor */ + for (int i = 1; i < 16; i++) { + u8 des = desc[i]; + u8 k = 0; + /* look up this descriptor in the table */ + while (cache_table[k].descriptor != 0) { + if (cache_table[k].descriptor == des) { + switch (cache_table[k].cache_type) { + case LVL_1_INST: + l1i += cache_table[k].size; + break; + case LVL_1_DATA: + l1d += cache_table[k].size; + break; + case LVL_2: + l2 += cache_table[k].size; + break; + case LVL_3: + l3 += cache_table[k].size; break; } - k++; + break; } + k++; } } } From fe78079ec07fd48a19abcfeac74bc97e07171fb6 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:32:58 +0100 Subject: [PATCH 023/101] x86/cpu: Introduce and use CPUID leaf 0x2 parsing helpers Introduce CPUID leaf 0x2 parsing helpers at . This allows sharing the leaf 0x2's output validation and iteration logic across both x86/cpu intel.c and cacheinfo.c. Start by converting intel.c to the new API. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-4-darwi@linutronix.de --- arch/x86/include/asm/cpuid.h | 1 + arch/x86/include/asm/cpuid/leaf_0x2_api.h | 65 +++++++++++++++++++++++ arch/x86/include/asm/cpuid/types.h | 16 ++++++ arch/x86/kernel/cpu/intel.c | 23 +++----- 4 files changed, 88 insertions(+), 17 deletions(-) create mode 100644 arch/x86/include/asm/cpuid/leaf_0x2_api.h diff --git a/arch/x86/include/asm/cpuid.h b/arch/x86/include/asm/cpuid.h index d5749b25fa10..585819331dc6 100644 --- a/arch/x86/include/asm/cpuid.h +++ b/arch/x86/include/asm/cpuid.h @@ -4,5 +4,6 @@ #define _ASM_X86_CPUID_H #include +#include #endif /* _ASM_X86_CPUID_H */ diff --git a/arch/x86/include/asm/cpuid/leaf_0x2_api.h b/arch/x86/include/asm/cpuid/leaf_0x2_api.h new file mode 100644 index 000000000000..4c845fc96716 --- /dev/null +++ b/arch/x86/include/asm/cpuid/leaf_0x2_api.h @@ -0,0 +1,65 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_CPUID_LEAF_0x2_API_H +#define _ASM_X86_CPUID_LEAF_0x2_API_H + +#include +#include + +/** + * cpuid_get_leaf_0x2_regs() - Return sanitized leaf 0x2 register output + * @regs: Output parameter + * + * Query CPUID leaf 0x2 and store its output in @regs. Force set any + * invalid 1-byte descriptor returned by the hardware to zero (the NULL + * cache/TLB descriptor) before returning it to the caller. + * + * Use for_each_leaf_0x2_desc() to iterate over the returned output. + */ +static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs) +{ + cpuid_leaf(0x2, regs); + + /* + * All Intel CPUs must report an iteration count of 1. In case + * of bogus hardware, treat all returned descriptors as NULL. + */ + if (regs->desc[0] != 0x01) { + for (int i = 0; i < 4; i++) + regs->regv[i] = 0; + return; + } + + /* + * The most significant bit (MSB) of each register must be clear. + * If a register is invalid, replace its descriptors with NULL. + */ + for (int i = 0; i < 4; i++) { + if (regs->reg[i].invalid) + regs->regv[i] = 0; + } +} + +/** + * for_each_leaf_0x2_desc() - Iterator for CPUID leaf 0x2 descriptors + * @regs: Leaf 0x2 output, as returned by cpuid_get_leaf_0x2_regs() + * @desc: Pointer to the returned descriptor for each iteration + * + * Loop over the 1-byte descriptors in the passed leaf 0x2 output registers + * @regs. Provide each descriptor through @desc. + * + * Note that the first byte is skipped as it is not a descriptor. + * + * Sample usage:: + * + * union leaf_0x2_regs regs; + * u8 *desc; + * + * cpuid_get_leaf_0x2_regs(®s); + * for_each_leaf_0x2_desc(regs, desc) { + * // Handle *desc value + * } + */ +#define for_each_leaf_0x2_desc(regs, desc) \ + for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++) + +#endif /* _ASM_X86_CPUID_LEAF_0x2_API_H */ diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index 8582e27e836d..753f6c4514f4 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -29,4 +29,20 @@ enum cpuid_regs_idx { #define CPUID_LEAF_FREQ 0x16 #define CPUID_LEAF_TILE 0x1d +/* + * Types for CPUID(0x2) parsing + * Check + */ + +struct leaf_0x2_reg { + u32 : 31, + invalid : 1; +}; + +union leaf_0x2_regs { + struct leaf_0x2_reg reg[4]; + u32 regv[4]; + u8 desc[16]; +}; + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 0570d4d86006..aeb7d6d48379 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -778,27 +779,15 @@ static void intel_tlb_lookup(const unsigned char desc) static void intel_detect_tlb(struct cpuinfo_x86 *c) { - u32 regs[4]; - u8 *desc = (u8 *)regs; + union leaf_0x2_regs regs; + u8 *desc; if (c->cpuid_level < 2) return; - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* Intel CPUs must report an iteration count of 1 */ - if (desc[0] != 0x01) - return; - - /* If a register's bit 31 is set, it is an unknown format */ - for (int i = 0; i < 4; i++) { - if (regs[i] & (1 << 31)) - regs[i] = 0; - } - - /* Skip the first byte as it is not a descriptor */ - for (int i = 1; i < 16; i++) - intel_tlb_lookup(desc[i]); + cpuid_get_leaf_0x2_regs(®s); + for_each_leaf_0x2_desc(regs, desc) + intel_tlb_lookup(*desc); } static const struct cpu_dev intel_cpu_dev = { From a078aaa38a23a2595addb14ac286d8b8ae793d39 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:32:59 +0100 Subject: [PATCH 024/101] x86/cacheinfo: Use CPUID leaf 0x2 parsing helpers Parent commit introduced CPUID leaf 0x2 parsing helpers at . The new API allows sharing leaf 0x2's output validation and iteration logic across both intel.c and cacheinfo.c. Convert cacheinfo.c to that new API. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-5-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 24 ++++++------------------ 1 file changed, 6 insertions(+), 18 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 36782fd017b3..6c610805e356 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -783,29 +784,16 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Don't use CPUID(2) if CPUID(4) is supported. */ if (!ci->num_leaves && c->cpuid_level > 1) { - u32 regs[4]; - u8 *desc = (u8 *)regs; + union leaf_0x2_regs regs; + u8 *desc; - cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); - - /* Intel CPUs must report an iteration count of 1 */ - if (desc[0] != 0x01) - return; - - /* If a register's bit 31 is set, it is an unknown format */ - for (int i = 0; i < 4; i++) { - if (regs[i] & (1 << 31)) - regs[i] = 0; - } - - /* Skip the first byte as it is not a descriptor */ - for (int i = 1; i < 16; i++) { - u8 des = desc[i]; + cpuid_get_leaf_0x2_regs(®s); + for_each_leaf_0x2_desc(regs, desc) { u8 k = 0; /* look up this descriptor in the table */ while (cache_table[k].descriptor != 0) { - if (cache_table[k].descriptor == des) { + if (cache_table[k].descriptor == *desc) { switch (cache_table[k].cache_type) { case LVL_1_INST: l1i += cache_table[k].size; From ee159792a4db06e359a5e2e231c6f1242ec76c31 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Mar 2025 14:33:00 +0100 Subject: [PATCH 025/101] x86/cacheinfo: Refactor CPUID leaf 0x2 cache descriptor lookup Extract the cache descriptor lookup logic out of the leaf 0x2 parsing code and into a dedicated function. This disentangles such lookup from the deeply nested leaf 0x2 parsing loop. Remove the cache table termination entry, as it is no longer needed after the ARRAY_SIZE()-based lookup. [ darwi: Move refactoring logic into this separate commit + commit log. Remove the cache table termination entry. ] Signed-off-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-6-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 42 +++++++++++++++------------------ 1 file changed, 19 insertions(+), 23 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 6c610805e356..d0bfdb85b96a 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -123,7 +123,6 @@ static const struct _cache_table cache_table[] = { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ - { 0x00, 0, 0} }; @@ -728,6 +727,16 @@ void init_hygon_cacheinfo(struct cpuinfo_x86 *c) ci->num_leaves = find_num_cache_leaves(c); } +static const struct _cache_table *cache_table_get(u8 desc) +{ + for (int i = 0; i < ARRAY_SIZE(cache_table); i++) { + if (cache_table[i].descriptor == desc) + return &cache_table[i]; + } + + return NULL; +} + void init_intel_cacheinfo(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -784,34 +793,21 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Don't use CPUID(2) if CPUID(4) is supported. */ if (!ci->num_leaves && c->cpuid_level > 1) { + const struct _cache_table *entry; union leaf_0x2_regs regs; u8 *desc; cpuid_get_leaf_0x2_regs(®s); for_each_leaf_0x2_desc(regs, desc) { - u8 k = 0; + entry = cache_table_get(*desc); + if (!entry) + continue; - /* look up this descriptor in the table */ - while (cache_table[k].descriptor != 0) { - if (cache_table[k].descriptor == *desc) { - switch (cache_table[k].cache_type) { - case LVL_1_INST: - l1i += cache_table[k].size; - break; - case LVL_1_DATA: - l1d += cache_table[k].size; - break; - case LVL_2: - l2 += cache_table[k].size; - break; - case LVL_3: - l3 += cache_table[k].size; - break; - } - - break; - } - k++; + switch (entry->cache_type) { + case LVL_1_INST: l1i += entry->size; break; + case LVL_1_DATA: l1d += entry->size; break; + case LVL_2: l2 += entry->size; break; + case LVL_3: l3 += entry->size; break; } } } From 21e2a452dca34538db0731b45c113cfd31aa45e6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Mar 2025 14:33:01 +0100 Subject: [PATCH 026/101] x86/cacheinfo: Properly name amd_cpuid4()'s first parameter amd_cpuid4()'s first parameter, "leaf", is not a CPUID leaf as the name implies. Rather, it's an index emulating CPUID(4)'s subleaf semantics; i.e. an ID for the cache object currently enumerated. Rename that parameter to "index". Apply minor coding style fixes to the rest of the function as well. [ darwi: Move into a separate commit and write commit log. Use "index" instead of "subleaf" for amd_cpuid4() first param, as that's the name typically used at the whole of cacheinfo.c. ] Signed-off-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-7-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index d0bfdb85b96a..0fd4e9673665 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -233,12 +233,10 @@ static const enum cache_type cache_type_map[] = { }; static void -amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, - union _cpuid4_leaf_ecx *ecx) +amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) { - unsigned dummy; - unsigned line_size, lines_per_tag, assoc, size_in_kb; + unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; union l1_cache l1i, l1d; union l2_cache l2; union l3_cache l3; @@ -251,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); - switch (leaf) { + switch (index) { case 1: l1 = &l1i; fallthrough; @@ -289,12 +287,11 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, } eax->split.is_self_initializing = 1; - eax->split.type = types[leaf]; - eax->split.level = levels[leaf]; + eax->split.type = types[index]; + eax->split.level = levels[index]; eax->split.num_threads_sharing = 0; eax->split.num_cores_on_die = topology_num_cores_per_package(); - if (assoc == 0xffff) eax->split.is_fully_associative = 1; ebx->split.coherency_line_size = line_size - 1; From cf8758205264707b4521163c0a37bf587c4425c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Mar 2025 14:33:02 +0100 Subject: [PATCH 027/101] x86/cacheinfo: Use proper name for cacheinfo instances The cacheinfo structure defined at is a generic cache info object representation. Calling its instances at x86 cacheinfo.c "leaf" confuses it with a CPUID leaf -- especially that multiple CPUID calls are already sprinkled across that file. Most of such instances also have a redundant "this_" prefix. Rename all of the cacheinfo "this_leaf" instances to just "ci". [ darwi: Move into separate commit and write commit log ] Signed-off-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-8-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 89 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 46 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 0fd4e9673665..be9be5e56b44 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -349,11 +349,10 @@ static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) return -1; } -static ssize_t show_cache_disable(struct cacheinfo *this_leaf, char *buf, - unsigned int slot) +static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int slot) { int index; - struct amd_northbridge *nb = this_leaf->priv; + struct amd_northbridge *nb = ci->priv; index = amd_get_l3_disable_slot(nb, slot); if (index >= 0) @@ -367,8 +366,8 @@ static ssize_t \ cache_disable_##slot##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ { \ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ - return show_cache_disable(this_leaf, buf, slot); \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return show_cache_disable(ci, buf, slot); \ } SHOW_CACHE_DISABLE(0) SHOW_CACHE_DISABLE(1) @@ -435,18 +434,17 @@ static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, return 0; } -static ssize_t store_cache_disable(struct cacheinfo *this_leaf, - const char *buf, size_t count, - unsigned int slot) +static ssize_t store_cache_disable(struct cacheinfo *ci, const char *buf, + size_t count, unsigned int slot) { unsigned long val = 0; int cpu, err = 0; - struct amd_northbridge *nb = this_leaf->priv; + struct amd_northbridge *nb = ci->priv; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - cpu = cpumask_first(&this_leaf->shared_cpu_map); + cpu = cpumask_first(&ci->shared_cpu_map); if (kstrtoul(buf, 10, &val) < 0) return -EINVAL; @@ -467,8 +465,8 @@ cache_disable_##slot##_store(struct device *dev, \ struct device_attribute *attr, \ const char *buf, size_t count) \ { \ - struct cacheinfo *this_leaf = dev_get_drvdata(dev); \ - return store_cache_disable(this_leaf, buf, count, slot); \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return store_cache_disable(ci, buf, count, slot); \ } STORE_CACHE_DISABLE(0) STORE_CACHE_DISABLE(1) @@ -476,8 +474,8 @@ STORE_CACHE_DISABLE(1) static ssize_t subcaches_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - int cpu = cpumask_first(&this_leaf->shared_cpu_map); + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); } @@ -486,8 +484,8 @@ static ssize_t subcaches_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { - struct cacheinfo *this_leaf = dev_get_drvdata(dev); - int cpu = cpumask_first(&this_leaf->shared_cpu_map); + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); unsigned long val; if (!capable(CAP_SYS_ADMIN)) @@ -511,10 +509,10 @@ cache_private_attrs_is_visible(struct kobject *kobj, struct attribute *attr, int unused) { struct device *dev = kobj_to_dev(kobj); - struct cacheinfo *this_leaf = dev_get_drvdata(dev); + struct cacheinfo *ci = dev_get_drvdata(dev); umode_t mode = attr->mode; - if (!this_leaf->priv) + if (!ci->priv) return 0; if ((attr == &dev_attr_subcaches.attr) && @@ -562,11 +560,11 @@ static void init_amd_l3_attrs(void) } const struct attribute_group * -cache_get_priv_group(struct cacheinfo *this_leaf) +cache_get_priv_group(struct cacheinfo *ci) { - struct amd_northbridge *nb = this_leaf->priv; + struct amd_northbridge *nb = ci->priv; - if (this_leaf->level < 3 || !nb) + if (ci->level < 3 || !nb) return NULL; if (nb && nb->l3_cache.indices) @@ -846,7 +844,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, struct _cpuid4_info_regs *base) { struct cpu_cacheinfo *this_cpu_ci; - struct cacheinfo *this_leaf; + struct cacheinfo *ci; int i, sibling; /* @@ -858,12 +856,12 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, this_cpu_ci = get_cpu_cacheinfo(i); if (!this_cpu_ci->info_list) continue; - this_leaf = this_cpu_ci->info_list + index; + ci = this_cpu_ci->info_list + index; for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; cpumask_set_cpu(sibling, - &this_leaf->shared_cpu_map); + &ci->shared_cpu_map); } } } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { @@ -883,14 +881,14 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, if ((apicid < first) || (apicid > last)) continue; - this_leaf = this_cpu_ci->info_list + index; + ci = this_cpu_ci->info_list + index; for_each_online_cpu(sibling) { apicid = cpu_data(sibling).topo.apicid; if ((apicid < first) || (apicid > last)) continue; cpumask_set_cpu(sibling, - &this_leaf->shared_cpu_map); + &ci->shared_cpu_map); } } } else @@ -903,7 +901,7 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, struct _cpuid4_info_regs *base) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf, *sibling_leaf; + struct cacheinfo *ci, *sibling_ci; unsigned long num_threads_sharing; int index_msb, i; struct cpuinfo_x86 *c = &cpu_data(cpu); @@ -914,10 +912,10 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, return; } - this_leaf = this_cpu_ci->info_list + index; + ci = this_cpu_ci->info_list + index; num_threads_sharing = 1 + base->eax.split.num_threads_sharing; - cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map); + cpumask_set_cpu(cpu, &ci->shared_cpu_map); if (num_threads_sharing == 1) return; @@ -929,28 +927,27 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, if (i == cpu || !sib_cpu_ci->info_list) continue;/* skip if itself or no cacheinfo */ - sibling_leaf = sib_cpu_ci->info_list + index; - cpumask_set_cpu(i, &this_leaf->shared_cpu_map); - cpumask_set_cpu(cpu, &sibling_leaf->shared_cpu_map); + sibling_ci = sib_cpu_ci->info_list + index; + cpumask_set_cpu(i, &ci->shared_cpu_map); + cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); } } -static void ci_leaf_init(struct cacheinfo *this_leaf, - struct _cpuid4_info_regs *base) +static void ci_info_init(struct cacheinfo *ci, struct _cpuid4_info_regs *base) { - this_leaf->id = base->id; - this_leaf->attributes = CACHE_ID; - this_leaf->level = base->eax.split.level; - this_leaf->type = cache_type_map[base->eax.split.type]; - this_leaf->coherency_line_size = + ci->id = base->id; + ci->attributes = CACHE_ID; + ci->level = base->eax.split.level; + ci->type = cache_type_map[base->eax.split.type]; + ci->coherency_line_size = base->ebx.split.coherency_line_size + 1; - this_leaf->ways_of_associativity = + ci->ways_of_associativity = base->ebx.split.ways_of_associativity + 1; - this_leaf->size = base->size; - this_leaf->number_of_sets = base->ecx.split.number_of_sets + 1; - this_leaf->physical_line_partition = + ci->size = base->size; + ci->number_of_sets = base->ecx.split.number_of_sets + 1; + ci->physical_line_partition = base->ebx.split.physical_line_partition + 1; - this_leaf->priv = base->nb; + ci->priv = base->nb; } int init_cache_level(unsigned int cpu) @@ -984,7 +981,7 @@ int populate_cache_leaves(unsigned int cpu) { unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - struct cacheinfo *this_leaf = this_cpu_ci->info_list; + struct cacheinfo *ci = this_cpu_ci->info_list; struct _cpuid4_info_regs id4_regs = {}; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { @@ -992,7 +989,7 @@ int populate_cache_leaves(unsigned int cpu) if (ret) return ret; get_cache_id(cpu, &id4_regs); - ci_leaf_init(this_leaf++, &id4_regs); + ci_info_init(ci++, &id4_regs); __cache_cpumap_setup(cpu, idx, &id4_regs); } this_cpu_ci->cpu_map_populated = true; From 7b83e0d2b20b38a025d15ba88516c1b8fe9c6f3d Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:03 +0100 Subject: [PATCH 028/101] x86/cacheinfo: Constify _cpuid4_info_regs instances _cpuid4_info_regs instances are passed through a large number of functions at cacheinfo.c. For clarity, constify the instance parameters where _cpuid4_info_regs is only read from. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-9-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index be9be5e56b44..fc4b49ec42df 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -841,7 +841,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, - struct _cpuid4_info_regs *base) + const struct _cpuid4_info_regs *base) { struct cpu_cacheinfo *this_cpu_ci; struct cacheinfo *ci; @@ -898,7 +898,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } static void __cache_cpumap_setup(unsigned int cpu, int index, - struct _cpuid4_info_regs *base) + const struct _cpuid4_info_regs *base) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci, *sibling_ci; @@ -933,7 +933,8 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, } } -static void ci_info_init(struct cacheinfo *ci, struct _cpuid4_info_regs *base) +static void ci_info_init(struct cacheinfo *ci, + const struct _cpuid4_info_regs *base) { ci->id = base->id; ci->attributes = CACHE_ID; From 036a73b5174477b72d881b4c75740d3dbfd7ec49 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:04 +0100 Subject: [PATCH 029/101] x86/cacheinfo: Align ci_info_init() assignment expressions The ci_info_init() function initializes 10 members of a 'struct cacheinfo' instance using passed data from CPUID leaf 0x4. Such assignment expressions are difficult to read in their current form. Align them for clarity. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-10-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index fc4b49ec42df..b273ecf3f538 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -936,19 +936,16 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info_regs *base) { - ci->id = base->id; - ci->attributes = CACHE_ID; - ci->level = base->eax.split.level; - ci->type = cache_type_map[base->eax.split.type]; - ci->coherency_line_size = - base->ebx.split.coherency_line_size + 1; - ci->ways_of_associativity = - base->ebx.split.ways_of_associativity + 1; - ci->size = base->size; - ci->number_of_sets = base->ecx.split.number_of_sets + 1; - ci->physical_line_partition = - base->ebx.split.physical_line_partition + 1; - ci->priv = base->nb; + ci->id = base->id; + ci->attributes = CACHE_ID; + ci->level = base->eax.split.level; + ci->type = cache_type_map[base->eax.split.type]; + ci->coherency_line_size = base->ebx.split.coherency_line_size + 1; + ci->ways_of_associativity = base->ebx.split.ways_of_associativity + 1; + ci->size = base->size; + ci->number_of_sets = base->ecx.split.number_of_sets + 1; + ci->physical_line_partition = base->ebx.split.physical_line_partition + 1; + ci->priv = base->nb; } int init_cache_level(unsigned int cpu) From 1374ff60ed0d4cedd55f653edd3f59f9f19c4171 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:05 +0100 Subject: [PATCH 030/101] x86/cacheinfo: Standardize _cpuid4_info_regs instance naming The cacheinfo code frequently uses the output registers from CPUID leaf 0x4. Such registers are cached in 'struct _cpuid4_info_regs', augmented with related information, and are then passed across functions. The naming of these _cpuid4_info_regs instances is confusing at best. Some instances are called "this_leaf", which is vague as "this" lacks context and "leaf" is overly generic given that other CPUID leaves are also processed within cacheinfo. Other _cpuid4_info_regs instances are just called "base", adding further ambiguity. Standardize on id4 for all instances. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-11-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 97 +++++++++++++++++---------------- 1 file changed, 49 insertions(+), 48 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b273ecf3f538..1b2a2bf97d7f 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -573,7 +573,7 @@ cache_get_priv_group(struct cacheinfo *ci) return &cache_private_group; } -static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) +static void amd_init_l3_cache(struct _cpuid4_info_regs *id4, int index) { int node; @@ -582,16 +582,16 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) return; node = topology_amd_node_id(smp_processor_id()); - this_leaf->nb = node_to_amd_nb(node); - if (this_leaf->nb && !this_leaf->nb->l3_cache.indices) - amd_calc_l3_indices(this_leaf->nb); + id4->nb = node_to_amd_nb(node); + if (id4->nb && !id4->nb->l3_cache.indices) + amd_calc_l3_indices(id4->nb); } #else #define amd_init_l3_cache(x, y) #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ static int -cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) +cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -604,11 +604,11 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) &ebx.full, &ecx.full, &edx); else amd_cpuid4(index, &eax, &ebx, &ecx); - amd_init_l3_cache(this_leaf, index); + amd_init_l3_cache(id4, index); } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &edx); - amd_init_l3_cache(this_leaf, index); + amd_init_l3_cache(id4, index); } else { cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); } @@ -616,13 +616,14 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *this_leaf) if (eax.split.type == CTYPE_NULL) return -EIO; /* better error ? */ - this_leaf->eax = eax; - this_leaf->ebx = ebx; - this_leaf->ecx = ecx; - this_leaf->size = (ecx.split.number_of_sets + 1) * - (ebx.split.coherency_line_size + 1) * - (ebx.split.physical_line_partition + 1) * - (ebx.split.ways_of_associativity + 1); + id4->eax = eax; + id4->ebx = ebx; + id4->ecx = ecx; + id4->size = (ecx.split.number_of_sets + 1) * + (ebx.split.coherency_line_size + 1) * + (ebx.split.physical_line_partition + 1) * + (ebx.split.ways_of_associativity + 1); + return 0; } @@ -754,29 +755,29 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < ci->num_leaves; i++) { - struct _cpuid4_info_regs this_leaf = {}; + struct _cpuid4_info_regs id4 = {}; int retval; - retval = cpuid4_cache_lookup_regs(i, &this_leaf); + retval = cpuid4_cache_lookup_regs(i, &id4); if (retval < 0) continue; - switch (this_leaf.eax.split.level) { + switch (id4.eax.split.level) { case 1: - if (this_leaf.eax.split.type == CTYPE_DATA) - new_l1d = this_leaf.size/1024; - else if (this_leaf.eax.split.type == CTYPE_INST) - new_l1i = this_leaf.size/1024; + if (id4.eax.split.type == CTYPE_DATA) + new_l1d = id4.size/1024; + else if (id4.eax.split.type == CTYPE_INST) + new_l1i = id4.size/1024; break; case 2: - new_l2 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + new_l2 = id4.size/1024; + num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l2_id = c->topo.apicid & ~((1 << index_msb) - 1); break; case 3: - new_l3 = this_leaf.size/1024; - num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; + new_l3 = id4.size/1024; + num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l3_id = c->topo.apicid & ~((1 << index_msb) - 1); break; @@ -841,7 +842,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, - const struct _cpuid4_info_regs *base) + const struct _cpuid4_info_regs *id4) { struct cpu_cacheinfo *this_cpu_ci; struct cacheinfo *ci; @@ -867,7 +868,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { unsigned int apicid, nshared, first, last; - nshared = base->eax.split.num_threads_sharing + 1; + nshared = id4->eax.split.num_threads_sharing + 1; apicid = cpu_data(cpu).topo.apicid; first = apicid - (apicid % nshared); last = first + nshared - 1; @@ -898,7 +899,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } static void __cache_cpumap_setup(unsigned int cpu, int index, - const struct _cpuid4_info_regs *base) + const struct _cpuid4_info_regs *id4) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci, *sibling_ci; @@ -908,12 +909,12 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { - if (__cache_amd_cpumap_setup(cpu, index, base)) + if (__cache_amd_cpumap_setup(cpu, index, id4)) return; } ci = this_cpu_ci->info_list + index; - num_threads_sharing = 1 + base->eax.split.num_threads_sharing; + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; cpumask_set_cpu(cpu, &ci->shared_cpu_map); if (num_threads_sharing == 1) @@ -934,18 +935,18 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, } static void ci_info_init(struct cacheinfo *ci, - const struct _cpuid4_info_regs *base) + const struct _cpuid4_info_regs *id4) { - ci->id = base->id; + ci->id = id4->id; ci->attributes = CACHE_ID; - ci->level = base->eax.split.level; - ci->type = cache_type_map[base->eax.split.type]; - ci->coherency_line_size = base->ebx.split.coherency_line_size + 1; - ci->ways_of_associativity = base->ebx.split.ways_of_associativity + 1; - ci->size = base->size; - ci->number_of_sets = base->ecx.split.number_of_sets + 1; - ci->physical_line_partition = base->ebx.split.physical_line_partition + 1; - ci->priv = base->nb; + ci->level = id4->eax.split.level; + ci->type = cache_type_map[id4->eax.split.type]; + ci->coherency_line_size = id4->ebx.split.coherency_line_size + 1; + ci->ways_of_associativity = id4->ebx.split.ways_of_associativity + 1; + ci->size = id4->size; + ci->number_of_sets = id4->ecx.split.number_of_sets + 1; + ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; + ci->priv = id4->nb; } int init_cache_level(unsigned int cpu) @@ -964,15 +965,15 @@ int init_cache_level(unsigned int cpu) * ECX as cache index. Then right shift apicid by the number's order to get * cache id for this cache node. */ -static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4_regs) +static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4) { struct cpuinfo_x86 *c = &cpu_data(cpu); unsigned long num_threads_sharing; int index_msb; - num_threads_sharing = 1 + id4_regs->eax.split.num_threads_sharing; + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); - id4_regs->id = c->topo.apicid >> index_msb; + id4->id = c->topo.apicid >> index_msb; } int populate_cache_leaves(unsigned int cpu) @@ -980,15 +981,15 @@ int populate_cache_leaves(unsigned int cpu) unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci = this_cpu_ci->info_list; - struct _cpuid4_info_regs id4_regs = {}; + struct _cpuid4_info_regs id4 = {}; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { - ret = cpuid4_cache_lookup_regs(idx, &id4_regs); + ret = cpuid4_cache_lookup_regs(idx, &id4); if (ret) return ret; - get_cache_id(cpu, &id4_regs); - ci_info_init(ci++, &id4_regs); - __cache_cpumap_setup(cpu, idx, &id4_regs); + get_cache_id(cpu, &id4); + ci_info_init(ci++, &id4); + __cache_cpumap_setup(cpu, idx, &id4); } this_cpu_ci->cpu_map_populated = true; From 77676e6802a10ffa5a0ad6367e8f6e14cbd88781 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:06 +0100 Subject: [PATCH 031/101] x86/cacheinfo: Consolidate AMD/Hygon leaf 0x8000001d calls While gathering CPU cache info, CPUID leaf 0x8000001d is invoked in two separate if blocks: one for Hygon CPUs and one for AMDs with topology extensions. After each invocation, amd_init_l3_cache() is called. Merge the two if blocks into a single condition, thus removing the duplicated code. Future commits will expand these if blocks, so combining them now is both cleaner and more maintainable. Note, while at it, remove a useless "better error?" comment that was within the same function since the 2005 commit e2cac78935ff ("[PATCH] x86_64: When running cpuid4 need to run on the correct CPU"). Note, as previously done at commit aec28d852ed2 ("x86/cpuid: Standardize on u32 in "), standardize on using 'u32' and 'u8' types. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-12-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 1b2a2bf97d7f..f1055e806c9f 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -593,28 +593,28 @@ static void amd_init_l3_cache(struct _cpuid4_info_regs *id4, int index) static int cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4) { - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - unsigned edx; + u8 cpu_vendor = boot_cpu_data.x86_vendor; + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + u32 edx; - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) { - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) - cpuid_count(0x8000001d, index, &eax.full, - &ebx.full, &ecx.full, &edx); - else + if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT) || cpu_vendor == X86_VENDOR_HYGON) { + /* AMD with TOPOEXT, or HYGON */ + cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &edx); + } else { + /* Legacy AMD fallback */ amd_cpuid4(index, &eax, &ebx, &ecx); - amd_init_l3_cache(id4, index); - } else if (boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) { - cpuid_count(0x8000001d, index, &eax.full, - &ebx.full, &ecx.full, &edx); + } amd_init_l3_cache(id4, index); } else { + /* Intel */ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); } if (eax.split.type == CTYPE_NULL) - return -EIO; /* better error ? */ + return -EIO; id4->eax = eax; id4->ebx = ebx; From c58ed2d4da8dced3fa4505f498bd393f565b471a Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:07 +0100 Subject: [PATCH 032/101] x86/cacheinfo: Separate amd_northbridge from _cpuid4_info_regs 'struct _cpuid4_info_regs' is meant to hold the CPUID leaf 0x4 output registers (EAX, EBX, and ECX), as well as derived information such as the cache node ID and size. It also contains a reference to amd_northbridge, which is there only to be "parked" until ci_info_init() can store it in the priv pointer of the API. That priv pointer is then used by AMD-specific L3 cache_disable_0/1 sysfs attributes. Decouple amd_northbridge from _cpuid4_info_regs and pass it explicitly through the functions at x86/cacheinfo. Doing so clarifies when amd_northbridge is actually needed (AMD-only code) and when it is not (Intel-specific code). It also prepares for moving the AMD-specific L3 cache_disable_0/1 sysfs code into its own file in next commit. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-13-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 45 +++++++++++++++++++++------------ 1 file changed, 29 insertions(+), 16 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index f1055e806c9f..8c2b51bb032a 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -168,7 +168,6 @@ struct _cpuid4_info_regs { union _cpuid4_leaf_ecx ecx; unsigned int id; unsigned long size; - struct amd_northbridge *nb; }; /* AMD doesn't have CPUID4. Emulate it here to report the same @@ -573,25 +572,36 @@ cache_get_priv_group(struct cacheinfo *ci) return &cache_private_group; } -static void amd_init_l3_cache(struct _cpuid4_info_regs *id4, int index) +static struct amd_northbridge *amd_init_l3_cache(int index) { + struct amd_northbridge *nb; int node; /* only for L3, and not in virtualized environments */ if (index < 3) - return; + return NULL; node = topology_amd_node_id(smp_processor_id()); - id4->nb = node_to_amd_nb(node); - if (id4->nb && !id4->nb->l3_cache.indices) - amd_calc_l3_indices(id4->nb); + nb = node_to_amd_nb(node); + if (nb && !nb->l3_cache.indices) + amd_calc_l3_indices(nb); + + return nb; } #else -#define amd_init_l3_cache(x, y) +static struct amd_northbridge *amd_init_l3_cache(int index) +{ + return NULL; +} #endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ -static int -cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4) +/* + * Fill passed _cpuid4_info_regs structure. + * Intel-only code paths should pass NULL for the amd_northbridge + * return pointer. + */ +static int cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4, + struct amd_northbridge **nb) { u8 cpu_vendor = boot_cpu_data.x86_vendor; union _cpuid4_leaf_eax eax; @@ -607,7 +617,9 @@ cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4) /* Legacy AMD fallback */ amd_cpuid4(index, &eax, &ebx, &ecx); } - amd_init_l3_cache(id4, index); + + if (nb) + *nb = amd_init_l3_cache(index); } else { /* Intel */ cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); @@ -758,7 +770,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) struct _cpuid4_info_regs id4 = {}; int retval; - retval = cpuid4_cache_lookup_regs(i, &id4); + retval = cpuid4_cache_lookup_regs(i, &id4, NULL); if (retval < 0) continue; @@ -934,8 +946,8 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, } } -static void ci_info_init(struct cacheinfo *ci, - const struct _cpuid4_info_regs *id4) +static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info_regs *id4, + struct amd_northbridge *nb) { ci->id = id4->id; ci->attributes = CACHE_ID; @@ -946,7 +958,7 @@ static void ci_info_init(struct cacheinfo *ci, ci->size = id4->size; ci->number_of_sets = id4->ecx.split.number_of_sets + 1; ci->physical_line_partition = id4->ebx.split.physical_line_partition + 1; - ci->priv = id4->nb; + ci->priv = nb; } int init_cache_level(unsigned int cpu) @@ -982,13 +994,14 @@ int populate_cache_leaves(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci = this_cpu_ci->info_list; struct _cpuid4_info_regs id4 = {}; + struct amd_northbridge *nb; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { - ret = cpuid4_cache_lookup_regs(idx, &id4); + ret = cpuid4_cache_lookup_regs(idx, &id4, &nb); if (ret) return ret; get_cache_id(cpu, &id4); - ci_info_init(ci++, &id4); + ci_info_init(ci++, &id4, nb); __cache_cpumap_setup(cpu, idx, &id4); } this_cpu_ci->cpu_map_populated = true; From 365e960d296ef9f36b20971aa4854ce07817a9bb Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:08 +0100 Subject: [PATCH 033/101] x86/cacheinfo: Move AMD cache_disable_0/1 handling to separate file Parent commit decoupled amd_northbridge out of _cpuid4_info_regs, where it was merely "parked" there until ci_info_init() can store it in the private pointer of the API. Given that decoupling, move the AMD-specific L3 cache_disable_0/1 sysfs code from the generic (and already extremely convoluted) x86/cacheinfo code into its own file. Compile the file only if CONFIG_AMD_NB and CONFIG_SYSFS are both enabled, which mirrors the existing logic. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-14-darwi@linutronix.de --- arch/x86/kernel/cpu/Makefile | 3 + arch/x86/kernel/cpu/amd_cache_disable.c | 301 ++++++++++++++++++++++++ arch/x86/kernel/cpu/cacheinfo.c | 298 ----------------------- arch/x86/kernel/cpu/cpu.h | 9 + 4 files changed, 313 insertions(+), 298 deletions(-) create mode 100644 arch/x86/kernel/cpu/amd_cache_disable.c diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 4efdf5c2efc8..3a39396d422d 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -38,6 +38,9 @@ obj-y += intel.o tsx.o obj-$(CONFIG_PM) += intel_epb.o endif obj-$(CONFIG_CPU_SUP_AMD) += amd.o +ifeq ($(CONFIG_AMD_NB)$(CONFIG_SYSFS),yy) +obj-y += amd_cache_disable.o +endif obj-$(CONFIG_CPU_SUP_HYGON) += hygon.o obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o diff --git a/arch/x86/kernel/cpu/amd_cache_disable.c b/arch/x86/kernel/cpu/amd_cache_disable.c new file mode 100644 index 000000000000..6d53aee0d869 --- /dev/null +++ b/arch/x86/kernel/cpu/amd_cache_disable.c @@ -0,0 +1,301 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * AMD L3 cache_disable_{0,1} sysfs handling + * Documentation/ABI/testing/sysfs-devices-system-cpu + */ + +#include +#include +#include +#include + +#include + +#include "cpu.h" + +/* + * L3 cache descriptors + */ +static void amd_calc_l3_indices(struct amd_northbridge *nb) +{ + struct amd_l3_cache *l3 = &nb->l3_cache; + unsigned int sc0, sc1, sc2, sc3; + u32 val = 0; + + pci_read_config_dword(nb->misc, 0x1C4, &val); + + /* calculate subcache sizes */ + l3->subcaches[0] = sc0 = !(val & BIT(0)); + l3->subcaches[1] = sc1 = !(val & BIT(4)); + + if (boot_cpu_data.x86 == 0x15) { + l3->subcaches[0] = sc0 += !(val & BIT(1)); + l3->subcaches[1] = sc1 += !(val & BIT(5)); + } + + l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); + l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); + + l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; +} + +/* + * check whether a slot used for disabling an L3 index is occupied. + * @l3: L3 cache descriptor + * @slot: slot number (0..1) + * + * @returns: the disabled index if used or negative value if slot free. + */ +static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned int slot) +{ + unsigned int reg = 0; + + pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); + + /* check whether this slot is activated already */ + if (reg & (3UL << 30)) + return reg & 0xfff; + + return -1; +} + +static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int slot) +{ + int index; + struct amd_northbridge *nb = ci->priv; + + index = amd_get_l3_disable_slot(nb, slot); + if (index >= 0) + return sprintf(buf, "%d\n", index); + + return sprintf(buf, "FREE\n"); +} + +#define SHOW_CACHE_DISABLE(slot) \ +static ssize_t \ +cache_disable_##slot##_show(struct device *dev, \ + struct device_attribute *attr, char *buf) \ +{ \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return show_cache_disable(ci, buf, slot); \ +} + +SHOW_CACHE_DISABLE(0) +SHOW_CACHE_DISABLE(1) + +static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, + unsigned int slot, unsigned long idx) +{ + int i; + + idx |= BIT(30); + + /* + * disable index in all 4 subcaches + */ + for (i = 0; i < 4; i++) { + u32 reg = idx | (i << 20); + + if (!nb->l3_cache.subcaches[i]) + continue; + + pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); + + /* + * We need to WBINVD on a core on the node containing the L3 + * cache which indices we disable therefore a simple wbinvd() + * is not sufficient. + */ + wbinvd_on_cpu(cpu); + + reg |= BIT(31); + pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); + } +} + +/* + * disable a L3 cache index by using a disable-slot + * + * @l3: L3 cache descriptor + * @cpu: A CPU on the node containing the L3 cache + * @slot: slot number (0..1) + * @index: index to disable + * + * @return: 0 on success, error status on failure + */ +static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, + unsigned int slot, unsigned long index) +{ + int ret = 0; + + /* check if @slot is already used or the index is already disabled */ + ret = amd_get_l3_disable_slot(nb, slot); + if (ret >= 0) + return -EEXIST; + + if (index > nb->l3_cache.indices) + return -EINVAL; + + /* check whether the other slot has disabled the same index already */ + if (index == amd_get_l3_disable_slot(nb, !slot)) + return -EEXIST; + + amd_l3_disable_index(nb, cpu, slot, index); + + return 0; +} + +static ssize_t store_cache_disable(struct cacheinfo *ci, const char *buf, + size_t count, unsigned int slot) +{ + struct amd_northbridge *nb = ci->priv; + unsigned long val = 0; + int cpu, err = 0; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + cpu = cpumask_first(&ci->shared_cpu_map); + + if (kstrtoul(buf, 10, &val) < 0) + return -EINVAL; + + err = amd_set_l3_disable_slot(nb, cpu, slot, val); + if (err) { + if (err == -EEXIST) + pr_warn("L3 slot %d in use/index already disabled!\n", + slot); + return err; + } + return count; +} + +#define STORE_CACHE_DISABLE(slot) \ +static ssize_t \ +cache_disable_##slot##_store(struct device *dev, \ + struct device_attribute *attr, \ + const char *buf, size_t count) \ +{ \ + struct cacheinfo *ci = dev_get_drvdata(dev); \ + return store_cache_disable(ci, buf, count, slot); \ +} + +STORE_CACHE_DISABLE(0) +STORE_CACHE_DISABLE(1) + +static ssize_t subcaches_show(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); + + return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); +} + +static ssize_t subcaches_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct cacheinfo *ci = dev_get_drvdata(dev); + int cpu = cpumask_first(&ci->shared_cpu_map); + unsigned long val; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + if (kstrtoul(buf, 16, &val) < 0) + return -EINVAL; + + if (amd_set_subcaches(cpu, val)) + return -EINVAL; + + return count; +} + +static DEVICE_ATTR_RW(cache_disable_0); +static DEVICE_ATTR_RW(cache_disable_1); +static DEVICE_ATTR_RW(subcaches); + +static umode_t cache_private_attrs_is_visible(struct kobject *kobj, + struct attribute *attr, int unused) +{ + struct device *dev = kobj_to_dev(kobj); + struct cacheinfo *ci = dev_get_drvdata(dev); + umode_t mode = attr->mode; + + if (!ci->priv) + return 0; + + if ((attr == &dev_attr_subcaches.attr) && + amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + return mode; + + if ((attr == &dev_attr_cache_disable_0.attr || + attr == &dev_attr_cache_disable_1.attr) && + amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + return mode; + + return 0; +} + +static struct attribute_group cache_private_group = { + .is_visible = cache_private_attrs_is_visible, +}; + +static void init_amd_l3_attrs(void) +{ + static struct attribute **amd_l3_attrs; + int n = 1; + + if (amd_l3_attrs) /* already initialized */ + return; + + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) + n += 2; + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + n += 1; + + amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); + if (!amd_l3_attrs) + return; + + n = 0; + if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { + amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; + amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; + } + if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) + amd_l3_attrs[n++] = &dev_attr_subcaches.attr; + + cache_private_group.attrs = amd_l3_attrs; +} + +const struct attribute_group *cache_get_priv_group(struct cacheinfo *ci) +{ + struct amd_northbridge *nb = ci->priv; + + if (ci->level < 3 || !nb) + return NULL; + + if (nb && nb->l3_cache.indices) + init_amd_l3_attrs(); + + return &cache_private_group; +} + +struct amd_northbridge *amd_init_l3_cache(int index) +{ + struct amd_northbridge *nb; + int node; + + /* only for L3, and not in virtualized environments */ + if (index < 3) + return NULL; + + node = topology_amd_node_id(smp_processor_id()); + nb = node_to_amd_nb(node); + if (nb && !nb->l3_cache.indices) + amd_calc_l3_indices(nb); + + return nb; +} diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 8c2b51bb032a..ea6fba942d27 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -9,12 +9,9 @@ */ #include -#include #include #include -#include #include -#include #include #include @@ -300,301 +297,6 @@ amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) - -/* - * L3 cache descriptors - */ -static void amd_calc_l3_indices(struct amd_northbridge *nb) -{ - struct amd_l3_cache *l3 = &nb->l3_cache; - unsigned int sc0, sc1, sc2, sc3; - u32 val = 0; - - pci_read_config_dword(nb->misc, 0x1C4, &val); - - /* calculate subcache sizes */ - l3->subcaches[0] = sc0 = !(val & BIT(0)); - l3->subcaches[1] = sc1 = !(val & BIT(4)); - - if (boot_cpu_data.x86 == 0x15) { - l3->subcaches[0] = sc0 += !(val & BIT(1)); - l3->subcaches[1] = sc1 += !(val & BIT(5)); - } - - l3->subcaches[2] = sc2 = !(val & BIT(8)) + !(val & BIT(9)); - l3->subcaches[3] = sc3 = !(val & BIT(12)) + !(val & BIT(13)); - - l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; -} - -/* - * check whether a slot used for disabling an L3 index is occupied. - * @l3: L3 cache descriptor - * @slot: slot number (0..1) - * - * @returns: the disabled index if used or negative value if slot free. - */ -static int amd_get_l3_disable_slot(struct amd_northbridge *nb, unsigned slot) -{ - unsigned int reg = 0; - - pci_read_config_dword(nb->misc, 0x1BC + slot * 4, ®); - - /* check whether this slot is activated already */ - if (reg & (3UL << 30)) - return reg & 0xfff; - - return -1; -} - -static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int slot) -{ - int index; - struct amd_northbridge *nb = ci->priv; - - index = amd_get_l3_disable_slot(nb, slot); - if (index >= 0) - return sprintf(buf, "%d\n", index); - - return sprintf(buf, "FREE\n"); -} - -#define SHOW_CACHE_DISABLE(slot) \ -static ssize_t \ -cache_disable_##slot##_show(struct device *dev, \ - struct device_attribute *attr, char *buf) \ -{ \ - struct cacheinfo *ci = dev_get_drvdata(dev); \ - return show_cache_disable(ci, buf, slot); \ -} -SHOW_CACHE_DISABLE(0) -SHOW_CACHE_DISABLE(1) - -static void amd_l3_disable_index(struct amd_northbridge *nb, int cpu, - unsigned slot, unsigned long idx) -{ - int i; - - idx |= BIT(30); - - /* - * disable index in all 4 subcaches - */ - for (i = 0; i < 4; i++) { - u32 reg = idx | (i << 20); - - if (!nb->l3_cache.subcaches[i]) - continue; - - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - - /* - * We need to WBINVD on a core on the node containing the L3 - * cache which indices we disable therefore a simple wbinvd() - * is not sufficient. - */ - wbinvd_on_cpu(cpu); - - reg |= BIT(31); - pci_write_config_dword(nb->misc, 0x1BC + slot * 4, reg); - } -} - -/* - * disable a L3 cache index by using a disable-slot - * - * @l3: L3 cache descriptor - * @cpu: A CPU on the node containing the L3 cache - * @slot: slot number (0..1) - * @index: index to disable - * - * @return: 0 on success, error status on failure - */ -static int amd_set_l3_disable_slot(struct amd_northbridge *nb, int cpu, - unsigned slot, unsigned long index) -{ - int ret = 0; - - /* check if @slot is already used or the index is already disabled */ - ret = amd_get_l3_disable_slot(nb, slot); - if (ret >= 0) - return -EEXIST; - - if (index > nb->l3_cache.indices) - return -EINVAL; - - /* check whether the other slot has disabled the same index already */ - if (index == amd_get_l3_disable_slot(nb, !slot)) - return -EEXIST; - - amd_l3_disable_index(nb, cpu, slot, index); - - return 0; -} - -static ssize_t store_cache_disable(struct cacheinfo *ci, const char *buf, - size_t count, unsigned int slot) -{ - unsigned long val = 0; - int cpu, err = 0; - struct amd_northbridge *nb = ci->priv; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - cpu = cpumask_first(&ci->shared_cpu_map); - - if (kstrtoul(buf, 10, &val) < 0) - return -EINVAL; - - err = amd_set_l3_disable_slot(nb, cpu, slot, val); - if (err) { - if (err == -EEXIST) - pr_warn("L3 slot %d in use/index already disabled!\n", - slot); - return err; - } - return count; -} - -#define STORE_CACHE_DISABLE(slot) \ -static ssize_t \ -cache_disable_##slot##_store(struct device *dev, \ - struct device_attribute *attr, \ - const char *buf, size_t count) \ -{ \ - struct cacheinfo *ci = dev_get_drvdata(dev); \ - return store_cache_disable(ci, buf, count, slot); \ -} -STORE_CACHE_DISABLE(0) -STORE_CACHE_DISABLE(1) - -static ssize_t subcaches_show(struct device *dev, - struct device_attribute *attr, char *buf) -{ - struct cacheinfo *ci = dev_get_drvdata(dev); - int cpu = cpumask_first(&ci->shared_cpu_map); - - return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); -} - -static ssize_t subcaches_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t count) -{ - struct cacheinfo *ci = dev_get_drvdata(dev); - int cpu = cpumask_first(&ci->shared_cpu_map); - unsigned long val; - - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; - - if (kstrtoul(buf, 16, &val) < 0) - return -EINVAL; - - if (amd_set_subcaches(cpu, val)) - return -EINVAL; - - return count; -} - -static DEVICE_ATTR_RW(cache_disable_0); -static DEVICE_ATTR_RW(cache_disable_1); -static DEVICE_ATTR_RW(subcaches); - -static umode_t -cache_private_attrs_is_visible(struct kobject *kobj, - struct attribute *attr, int unused) -{ - struct device *dev = kobj_to_dev(kobj); - struct cacheinfo *ci = dev_get_drvdata(dev); - umode_t mode = attr->mode; - - if (!ci->priv) - return 0; - - if ((attr == &dev_attr_subcaches.attr) && - amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - return mode; - - if ((attr == &dev_attr_cache_disable_0.attr || - attr == &dev_attr_cache_disable_1.attr) && - amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - return mode; - - return 0; -} - -static struct attribute_group cache_private_group = { - .is_visible = cache_private_attrs_is_visible, -}; - -static void init_amd_l3_attrs(void) -{ - int n = 1; - static struct attribute **amd_l3_attrs; - - if (amd_l3_attrs) /* already initialized */ - return; - - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) - n += 2; - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - n += 1; - - amd_l3_attrs = kcalloc(n, sizeof(*amd_l3_attrs), GFP_KERNEL); - if (!amd_l3_attrs) - return; - - n = 0; - if (amd_nb_has_feature(AMD_NB_L3_INDEX_DISABLE)) { - amd_l3_attrs[n++] = &dev_attr_cache_disable_0.attr; - amd_l3_attrs[n++] = &dev_attr_cache_disable_1.attr; - } - if (amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) - amd_l3_attrs[n++] = &dev_attr_subcaches.attr; - - cache_private_group.attrs = amd_l3_attrs; -} - -const struct attribute_group * -cache_get_priv_group(struct cacheinfo *ci) -{ - struct amd_northbridge *nb = ci->priv; - - if (ci->level < 3 || !nb) - return NULL; - - if (nb && nb->l3_cache.indices) - init_amd_l3_attrs(); - - return &cache_private_group; -} - -static struct amd_northbridge *amd_init_l3_cache(int index) -{ - struct amd_northbridge *nb; - int node; - - /* only for L3, and not in virtualized environments */ - if (index < 3) - return NULL; - - node = topology_amd_node_id(smp_processor_id()); - nb = node_to_amd_nb(node); - if (nb && !nb->l3_cache.indices) - amd_calc_l3_indices(nb); - - return nb; -} -#else -static struct amd_northbridge *amd_init_l3_cache(int index) -{ - return NULL; -} -#endif /* CONFIG_AMD_NB && CONFIG_SYSFS */ - /* * Fill passed _cpuid4_info_regs structure. * Intel-only code paths should pass NULL for the amd_northbridge diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h index 51deb60a9d26..bc38b2d56f26 100644 --- a/arch/x86/kernel/cpu/cpu.h +++ b/arch/x86/kernel/cpu/cpu.h @@ -75,6 +75,15 @@ extern void check_null_seg_clears_base(struct cpuinfo_x86 *c); void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id); void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c); +#if defined(CONFIG_AMD_NB) && defined(CONFIG_SYSFS) +struct amd_northbridge *amd_init_l3_cache(int index); +#else +static inline struct amd_northbridge *amd_init_l3_cache(int index) +{ + return NULL; +} +#endif + unsigned int aperfmperf_get_khz(int cpu); void cpu_select_mitigations(void); From 071f4ad6494aff76589ca30a2d13e74bc1e33e0f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:09 +0100 Subject: [PATCH 034/101] x86/cacheinfo: Use sysfs_emit() for sysfs attributes show() Per Documentation/filesystems/sysfs.rst, a sysfs attribute's show() method should only use sysfs_emit() or sysfs_emit_at() when returning values to user space. Use sysfs_emit() for the AMD L3 cache sysfs attributes cache_disable_0, cache_disable_1, and subcaches. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-15-darwi@linutronix.de --- arch/x86/kernel/cpu/amd_cache_disable.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/amd_cache_disable.c b/arch/x86/kernel/cpu/amd_cache_disable.c index 6d53aee0d869..d860ad3f8a5a 100644 --- a/arch/x86/kernel/cpu/amd_cache_disable.c +++ b/arch/x86/kernel/cpu/amd_cache_disable.c @@ -66,9 +66,9 @@ static ssize_t show_cache_disable(struct cacheinfo *ci, char *buf, unsigned int index = amd_get_l3_disable_slot(nb, slot); if (index >= 0) - return sprintf(buf, "%d\n", index); + return sysfs_emit(buf, "%d\n", index); - return sprintf(buf, "FREE\n"); + return sysfs_emit(buf, "FREE\n"); } #define SHOW_CACHE_DISABLE(slot) \ @@ -189,7 +189,7 @@ static ssize_t subcaches_show(struct device *dev, struct device_attribute *attr, struct cacheinfo *ci = dev_get_drvdata(dev); int cpu = cpumask_first(&ci->shared_cpu_map); - return sprintf(buf, "%x\n", amd_get_subcaches(cpu)); + return sysfs_emit(buf, "%x\n", amd_get_subcaches(cpu)); } static ssize_t subcaches_store(struct device *dev, From 2d56cc87225035919dfceb40e676081d833dab13 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:10 +0100 Subject: [PATCH 035/101] x86/cacheinfo: Separate Intel and AMD CPUID leaf 0x4 code paths The CPUID leaf 0x4 parsing code at cpuid4_cache_lookup_regs() is ugly and convoluted. It is tangled with multiple nested conditions to handle: * AMD with TOPEXT, or Hygon CPUs via leaf 0x8000001d * Legacy AMD fallback via leaf 0x4 emulation * Intel CPUs via the actual CPUID leaf 0x4 Moreover, AMD L3 northbridge initialization is also awkwardly placed alongside the CPUID calls of the first two scenarios above. Refactor all of that as follows: * Update AMD's leaf 0x4 emulation comment to represent current state * Clearly label the AMD leaf 0x4 emulation function as a fallback * Split AMD/Hygon and Intel code paths into separate functions * Move AMD L3 northbridge initialization out of CPUID leaf 0x4 code, and into populate_cache_leaves() where it belongs. There, ci_info_init() can directly store the initialized object in the private pointer of the API. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-16-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 95 +++++++++++++++++++-------------- 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index ea6fba942d27..10a79d87ce86 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -167,12 +167,11 @@ struct _cpuid4_info_regs { unsigned long size; }; -/* AMD doesn't have CPUID4. Emulate it here to report the same - information to the user. This makes some assumptions about the machine: - L2 not shared, no SMT etc. that is currently true on AMD CPUs. +/* + * Fallback AMD CPUID(4) emulation + * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) + */ - In theory the TLBs could be reported as fake type (they are in "dummy"). - Maybe later */ union l1_cache { struct { unsigned line_size:8; @@ -228,9 +227,8 @@ static const enum cache_type cache_type_map[] = { [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, }; -static void -amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, - union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) +static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, + union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) { unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; union l1_cache l1i, l1d; @@ -297,36 +295,9 @@ amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -/* - * Fill passed _cpuid4_info_regs structure. - * Intel-only code paths should pass NULL for the amd_northbridge - * return pointer. - */ -static int cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4, - struct amd_northbridge **nb) +static int cpuid4_info_fill_done(struct _cpuid4_info_regs *id4, union _cpuid4_leaf_eax eax, + union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) { - u8 cpu_vendor = boot_cpu_data.x86_vendor; - union _cpuid4_leaf_eax eax; - union _cpuid4_leaf_ebx ebx; - union _cpuid4_leaf_ecx ecx; - u32 edx; - - if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) { - if (boot_cpu_has(X86_FEATURE_TOPOEXT) || cpu_vendor == X86_VENDOR_HYGON) { - /* AMD with TOPOEXT, or HYGON */ - cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &edx); - } else { - /* Legacy AMD fallback */ - amd_cpuid4(index, &eax, &ebx, &ecx); - } - - if (nb) - *nb = amd_init_l3_cache(index); - } else { - /* Intel */ - cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); - } - if (eax.split.type == CTYPE_NULL) return -EIO; @@ -341,6 +312,42 @@ static int cpuid4_cache_lookup_regs(int index, struct _cpuid4_info_regs *id4, return 0; } +static int amd_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + u32 ignored; + + if (boot_cpu_has(X86_FEATURE_TOPOEXT) || boot_cpu_data.x86_vendor == X86_VENDOR_HYGON) + cpuid_count(0x8000001d, index, &eax.full, &ebx.full, &ecx.full, &ignored); + else + legacy_amd_cpuid4(index, &eax, &ebx, &ecx); + + return cpuid4_info_fill_done(id4, eax, ebx, ecx); +} + +static int intel_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +{ + union _cpuid4_leaf_eax eax; + union _cpuid4_leaf_ebx ebx; + union _cpuid4_leaf_ecx ecx; + u32 ignored; + + cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &ignored); + + return cpuid4_info_fill_done(id4, eax, ebx, ecx); +} + +static int fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +{ + u8 cpu_vendor = boot_cpu_data.x86_vendor; + + return (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) ? + amd_fill_cpuid4_info(index, id4) : + intel_fill_cpuid4_info(index, id4); +} + static int find_num_cache_leaves(struct cpuinfo_x86 *c) { unsigned int eax, ebx, ecx, edx, op; @@ -472,7 +479,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) struct _cpuid4_info_regs id4 = {}; int retval; - retval = cpuid4_cache_lookup_regs(i, &id4, NULL); + retval = intel_fill_cpuid4_info(i, &id4); if (retval < 0) continue; @@ -692,17 +699,23 @@ static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4) int populate_cache_leaves(unsigned int cpu) { - unsigned int idx, ret; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci = this_cpu_ci->info_list; + u8 cpu_vendor = boot_cpu_data.x86_vendor; struct _cpuid4_info_regs id4 = {}; - struct amd_northbridge *nb; + struct amd_northbridge *nb = NULL; + int idx, ret; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { - ret = cpuid4_cache_lookup_regs(idx, &id4, &nb); + ret = fill_cpuid4_info(idx, &id4); if (ret) return ret; + get_cache_id(cpu, &id4); + + if (cpu_vendor == X86_VENDOR_AMD || cpu_vendor == X86_VENDOR_HYGON) + nb = amd_init_l3_cache(idx); + ci_info_init(ci++, &id4, nb); __cache_cpumap_setup(cpu, idx, &id4); } From eb1c7c08c5a8155d0d2d0a6c7fd741039b7287f1 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:11 +0100 Subject: [PATCH 036/101] x86/cacheinfo: Rename 'struct _cpuid4_info_regs' to 'struct _cpuid4_info' Parent commits decoupled amd_northbridge from _cpuid4_info_regs, moved AMD L3 northbridge cache_disable_0/1 sysfs code to its own file, and splitted AMD vs. Intel leaf 0x4 handling into: amd_fill_cpuid4_info() intel_fill_cpuid4_info() fill_cpuid4_info() After doing all that, the "_cpuid4_info_regs" name becomes a mouthful. It is also not totally accurate, as the structure holds cpuid4 derived information like cache node ID and size -- not just regs. Rename struct _cpuid4_info_regs to _cpuid4_info. That new name also better matches the AMD/Intel leaf 0x4 functions mentioned above. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-17-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 10a79d87ce86..2aaa0f830ec3 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -159,7 +159,7 @@ union _cpuid4_leaf_ecx { u32 full; }; -struct _cpuid4_info_regs { +struct _cpuid4_info { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; union _cpuid4_leaf_ecx ecx; @@ -295,7 +295,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, (ebx->split.ways_of_associativity + 1) - 1; } -static int cpuid4_info_fill_done(struct _cpuid4_info_regs *id4, union _cpuid4_leaf_eax eax, +static int cpuid4_info_fill_done(struct _cpuid4_info *id4, union _cpuid4_leaf_eax eax, union _cpuid4_leaf_ebx ebx, union _cpuid4_leaf_ecx ecx) { if (eax.split.type == CTYPE_NULL) @@ -312,7 +312,7 @@ static int cpuid4_info_fill_done(struct _cpuid4_info_regs *id4, union _cpuid4_le return 0; } -static int amd_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +static int amd_fill_cpuid4_info(int index, struct _cpuid4_info *id4) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -327,7 +327,7 @@ static int amd_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) return cpuid4_info_fill_done(id4, eax, ebx, ecx); } -static int intel_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +static int intel_fill_cpuid4_info(int index, struct _cpuid4_info *id4) { union _cpuid4_leaf_eax eax; union _cpuid4_leaf_ebx ebx; @@ -339,7 +339,7 @@ static int intel_fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) return cpuid4_info_fill_done(id4, eax, ebx, ecx); } -static int fill_cpuid4_info(int index, struct _cpuid4_info_regs *id4) +static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) { u8 cpu_vendor = boot_cpu_data.x86_vendor; @@ -476,7 +476,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * parameters cpuid leaf to find the cache details */ for (i = 0; i < ci->num_leaves; i++) { - struct _cpuid4_info_regs id4 = {}; + struct _cpuid4_info id4 = {}; int retval; retval = intel_fill_cpuid4_info(i, &id4); @@ -563,7 +563,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, - const struct _cpuid4_info_regs *id4) + const struct _cpuid4_info *id4) { struct cpu_cacheinfo *this_cpu_ci; struct cacheinfo *ci; @@ -620,7 +620,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } static void __cache_cpumap_setup(unsigned int cpu, int index, - const struct _cpuid4_info_regs *id4) + const struct _cpuid4_info *id4) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci, *sibling_ci; @@ -655,7 +655,7 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, } } -static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info_regs *id4, +static void ci_info_init(struct cacheinfo *ci, const struct _cpuid4_info *id4, struct amd_northbridge *nb) { ci->id = id4->id; @@ -686,7 +686,7 @@ int init_cache_level(unsigned int cpu) * ECX as cache index. Then right shift apicid by the number's order to get * cache id for this cache node. */ -static void get_cache_id(int cpu, struct _cpuid4_info_regs *id4) +static void get_cache_id(int cpu, struct _cpuid4_info *id4) { struct cpuinfo_x86 *c = &cpu_data(cpu); unsigned long num_threads_sharing; @@ -702,8 +702,8 @@ int populate_cache_leaves(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); struct cacheinfo *ci = this_cpu_ci->info_list; u8 cpu_vendor = boot_cpu_data.x86_vendor; - struct _cpuid4_info_regs id4 = {}; struct amd_northbridge *nb = NULL; + struct _cpuid4_info id4 = {}; int idx, ret; for (idx = 0; idx < this_cpu_ci->num_leaves; idx++) { From 7596ab7a107bf70d571d75ad6ae078127f5ea11b Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:12 +0100 Subject: [PATCH 037/101] x86/cacheinfo: Clarify type markers for CPUID leaf 0x2 cache descriptors CPUID leaf 0x2 output is a stream of one-byte descriptors, each implying certain details about the CPU's cache and TLB entries. Two separate tables exist for interpreting these descriptors: one for TLBs at intel.c and one for caches at cacheinfo.c. These mapping tables will be merged in further commits, among other improvements to their model. In preparation for this, use more descriptive type names for the leaf 0x2 descriptors associated with cpu caches. Namely: LVL_1_INST => CACHE_L1_INST LVL_1_DATA => CACHE_L1_DATA LVL_2 => CACHE_L2 LVL_3 => CACHE_L3 After the TLB and cache descriptors mapping tables are merged, this will make it clear that such descriptors correspond to cpu caches. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-18-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 152 ++++++++++++++++---------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 2aaa0f830ec3..626f55f960dc 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -23,10 +23,10 @@ #include "cpu.h" -#define LVL_1_INST 1 -#define LVL_1_DATA 2 -#define LVL_2 3 -#define LVL_3 4 +#define CACHE_L1_INST 1 +#define CACHE_L1_DATA 2 +#define CACHE_L2 3 +#define CACHE_L3 4 /* Shared last level cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); @@ -52,74 +52,74 @@ struct _cache_table { static const struct _cache_table cache_table[] = { - { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, LVL_1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x09, LVL_1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ - { 0x0a, LVL_1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, LVL_1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0d, LVL_1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ - { 0x0e, LVL_1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ - { 0x21, LVL_2, 256 }, /* 8-way set assoc, 64 byte line size */ - { 0x22, LVL_3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, LVL_3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, LVL_3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, LVL_3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, LVL_1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, LVL_1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, LVL_2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, LVL_2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, LVL_2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, LVL_2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, LVL_2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, LVL_2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3f, LVL_2, 256 }, /* 2-way set assoc, 64 byte line size */ - { 0x41, LVL_2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, LVL_2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, LVL_2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, LVL_2, MB(1) }, /* 4-way set assoc, 32 byte line size */ - { 0x45, LVL_2, MB(2) }, /* 4-way set assoc, 32 byte line size */ - { 0x46, LVL_3, MB(4) }, /* 4-way set assoc, 64 byte line size */ - { 0x47, LVL_3, MB(8) }, /* 8-way set assoc, 64 byte line size */ - { 0x48, LVL_2, MB(3) }, /* 12-way set assoc, 64 byte line size */ - { 0x49, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, LVL_3, MB(6) }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, LVL_3, MB(12) }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, LVL_3, MB(16) }, /* 16-way set assoc, 64 byte line size */ - { 0x4e, LVL_2, MB(6) }, /* 24-way set assoc, 64 byte line size */ - { 0x60, LVL_1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, LVL_1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, LVL_1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, LVL_1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x78, LVL_2, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0x79, LVL_2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, LVL_2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, LVL_2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, LVL_2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, LVL_2, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, LVL_2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x80, LVL_2, 512 }, /* 8-way set assoc, 64 byte line size */ - { 0x82, LVL_2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, LVL_2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, LVL_2, MB(1) }, /* 8-way set assoc, 32 byte line size */ - { 0x85, LVL_2, MB(2) }, /* 8-way set assoc, 32 byte line size */ - { 0x86, LVL_2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, LVL_2, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd0, LVL_3, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0xd1, LVL_3, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0xd2, LVL_3, MB(2) }, /* 4-way set assoc, 64 byte line size */ - { 0xd6, LVL_3, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, LVL_3, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0xd8, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xdc, LVL_3, MB(2) }, /* 12-way set assoc, 64 byte line size */ - { 0xdd, LVL_3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xde, LVL_3, MB(8) }, /* 12-way set assoc, 64 byte line size */ - { 0xe2, LVL_3, MB(2) }, /* 16-way set assoc, 64 byte line size */ - { 0xe3, LVL_3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0xe4, LVL_3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0xea, LVL_3, MB(12) }, /* 24-way set assoc, 64 byte line size */ - { 0xeb, LVL_3, MB(18) }, /* 24-way set assoc, 64 byte line size */ - { 0xec, LVL_3, MB(24) }, /* 24-way set assoc, 64 byte line size */ + { 0x06, CACHE_L1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ + { 0x08, CACHE_L1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x09, CACHE_L1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ + { 0x0a, CACHE_L1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ + { 0x0c, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ + { 0x0d, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ + { 0x0e, CACHE_L1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ + { 0x21, CACHE_L2, 256 }, /* 8-way set assoc, 64 byte line size */ + { 0x22, CACHE_L3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x23, CACHE_L3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x25, CACHE_L3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x29, CACHE_L3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x2c, CACHE_L1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x30, CACHE_L1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ + { 0x39, CACHE_L2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3a, CACHE_L2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3b, CACHE_L2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ + { 0x3c, CACHE_L2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3d, CACHE_L2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ + { 0x3e, CACHE_L2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x3f, CACHE_L2, 256 }, /* 2-way set assoc, 64 byte line size */ + { 0x41, CACHE_L2, 128 }, /* 4-way set assoc, 32 byte line size */ + { 0x42, CACHE_L2, 256 }, /* 4-way set assoc, 32 byte line size */ + { 0x43, CACHE_L2, 512 }, /* 4-way set assoc, 32 byte line size */ + { 0x44, CACHE_L2, MB(1) }, /* 4-way set assoc, 32 byte line size */ + { 0x45, CACHE_L2, MB(2) }, /* 4-way set assoc, 32 byte line size */ + { 0x46, CACHE_L3, MB(4) }, /* 4-way set assoc, 64 byte line size */ + { 0x47, CACHE_L3, MB(8) }, /* 8-way set assoc, 64 byte line size */ + { 0x48, CACHE_L2, MB(3) }, /* 12-way set assoc, 64 byte line size */ + { 0x49, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ + { 0x4a, CACHE_L3, MB(6) }, /* 12-way set assoc, 64 byte line size */ + { 0x4b, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ + { 0x4c, CACHE_L3, MB(12) }, /* 12-way set assoc, 64 byte line size */ + { 0x4d, CACHE_L3, MB(16) }, /* 16-way set assoc, 64 byte line size */ + { 0x4e, CACHE_L2, MB(6) }, /* 24-way set assoc, 64 byte line size */ + { 0x60, CACHE_L1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x66, CACHE_L1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x67, CACHE_L1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x68, CACHE_L1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ + { 0x78, CACHE_L2, MB(1) }, /* 4-way set assoc, 64 byte line size */ + { 0x79, CACHE_L2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7a, CACHE_L2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7b, CACHE_L2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7c, CACHE_L2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ + { 0x7d, CACHE_L2, MB(2) }, /* 8-way set assoc, 64 byte line size */ + { 0x7f, CACHE_L2, 512 }, /* 2-way set assoc, 64 byte line size */ + { 0x80, CACHE_L2, 512 }, /* 8-way set assoc, 64 byte line size */ + { 0x82, CACHE_L2, 256 }, /* 8-way set assoc, 32 byte line size */ + { 0x83, CACHE_L2, 512 }, /* 8-way set assoc, 32 byte line size */ + { 0x84, CACHE_L2, MB(1) }, /* 8-way set assoc, 32 byte line size */ + { 0x85, CACHE_L2, MB(2) }, /* 8-way set assoc, 32 byte line size */ + { 0x86, CACHE_L2, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0x87, CACHE_L2, MB(1) }, /* 8-way set assoc, 64 byte line size */ + { 0xd0, CACHE_L3, 512 }, /* 4-way set assoc, 64 byte line size */ + { 0xd1, CACHE_L3, MB(1) }, /* 4-way set assoc, 64 byte line size */ + { 0xd2, CACHE_L3, MB(2) }, /* 4-way set assoc, 64 byte line size */ + { 0xd6, CACHE_L3, MB(1) }, /* 8-way set assoc, 64 byte line size */ + { 0xd7, CACHE_L3, MB(2) }, /* 8-way set assoc, 64 byte line size */ + { 0xd8, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ + { 0xdc, CACHE_L3, MB(2) }, /* 12-way set assoc, 64 byte line size */ + { 0xdd, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ + { 0xde, CACHE_L3, MB(8) }, /* 12-way set assoc, 64 byte line size */ + { 0xe2, CACHE_L3, MB(2) }, /* 16-way set assoc, 64 byte line size */ + { 0xe3, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ + { 0xe4, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ + { 0xea, CACHE_L3, MB(12) }, /* 24-way set assoc, 64 byte line size */ + { 0xeb, CACHE_L3, MB(18) }, /* 24-way set assoc, 64 byte line size */ + { 0xec, CACHE_L3, MB(24) }, /* 24-way set assoc, 64 byte line size */ }; @@ -521,10 +521,10 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) continue; switch (entry->cache_type) { - case LVL_1_INST: l1i += entry->size; break; - case LVL_1_DATA: l1d += entry->size; break; - case LVL_2: l2 += entry->size; break; - case LVL_3: l3 += entry->size; break; + case CACHE_L1_INST: l1i += entry->size; break; + case CACHE_L1_DATA: l1d += entry->size; break; + case CACHE_L2: l2 += entry->size; break; + case CACHE_L3: l3 += entry->size; break; } } } From e1e6b57146554a321d0ed1e76d2839ac24117f26 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:13 +0100 Subject: [PATCH 038/101] x86/cacheinfo: Use enums for cache descriptor types The leaf 0x2 one-byte cache descriptor types: CACHE_L1_INST CACHE_L1_DATA CACHE_L2 CACHE_L3 are just discriminators to be used within the cache_table[] mapping. Their specific values are irrelevant. Use enums for such types. Make the enum packed and static assert that its values remain within a single byte so that the cache_table[] array size do not go out of hand. Use a __CHECKER__ guard for the static_assert(sizeof(enum) == 1) line as sparse ignores the __packed annotation on enums. This is similar to: fe3944fb245a ("fs: Move enum rw_hint into a new header file") for the core SCSI code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/Z9rsTirs9lLfEPD9@lx-t490 Link: https://lore.kernel.org/r/20250324133324.23458-19-darwi@linutronix.de --- arch/x86/include/asm/cpuid/types.h | 15 +++++++++++++++ arch/x86/kernel/cpu/cacheinfo.c | 9 ++------- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index 753f6c4514f4..39c3c79c081b 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -2,6 +2,7 @@ #ifndef _ASM_X86_CPUID_TYPES_H #define _ASM_X86_CPUID_TYPES_H +#include #include /* @@ -45,4 +46,18 @@ union leaf_0x2_regs { u8 desc[16]; }; +/* + * Leaf 0x2 1-byte descriptors' cache types + * To be used for their mappings at cache_table[] + */ +enum _cache_table_type { + CACHE_L1_INST, + CACHE_L1_DATA, + CACHE_L2, + CACHE_L3, +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _cache_table_type) == 1); +#endif + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 626f55f960dc..09c5aa9af973 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -23,11 +23,6 @@ #include "cpu.h" -#define CACHE_L1_INST 1 -#define CACHE_L1_DATA 2 -#define CACHE_L2 3 -#define CACHE_L3 4 - /* Shared last level cache maps */ DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map); @@ -41,7 +36,7 @@ unsigned int memory_caching_control __ro_after_init; struct _cache_table { u8 descriptor; - char cache_type; + enum _cache_table_type type; short size; }; @@ -520,7 +515,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) if (!entry) continue; - switch (entry->cache_type) { + switch (entry->type) { case CACHE_L1_INST: l1i += entry->size; break; case CACHE_L1_DATA: l1d += entry->size; break; case CACHE_L2: l2 += entry->size; break; From 543904cdfe1eb53ff4267f561a4d59cb1fe6ceb7 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:14 +0100 Subject: [PATCH 039/101] x86/cpu: Use enums for TLB descriptor types The leaf 0x2 one-byte TLB descriptor types: TLB_INST_4K TLB_INST_4M TLB_INST_2M_4M ... are just discriminators to be used within the intel_tlb_table[] mapping. Their specific values are irrelevant. Use enums for such types. Make the enum packed and static assert that its values remain within a single byte so that the intel_tlb_table[] size do not go out of hand. Use a __CHECKER__ guard for the static_assert(sizeof(enum) == 1) line as sparse ignores the __packed annotation on enums. This is similar to: fe3944fb245a ("fs: Move enum rw_hint into a new header file") for the core SCSI code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/Z9rsTirs9lLfEPD9@lx-t490 Link: https://lore.kernel.org/r/20250324133324.23458-20-darwi@linutronix.de --- arch/x86/include/asm/cpuid/types.h | 31 ++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/intel.c | 28 +++------------------------ 2 files changed, 34 insertions(+), 25 deletions(-) diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index 39c3c79c081b..e756327f8e02 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -60,4 +60,35 @@ enum _cache_table_type { static_assert(sizeof(enum _cache_table_type) == 1); #endif +/* + * Leaf 0x2 1-byte descriptors' TLB types + * To be used for their mappings at intel_tlb_table[] + * + * Start at 1 since type 0 is reserved for HW byte descriptors which are + * not recognized by the kernel; i.e., those without an explicit mapping. + */ +enum _tlb_table_type { + TLB_INST_4K = 1, + TLB_INST_4M, + TLB_INST_2M_4M, + TLB_INST_ALL, + + TLB_DATA_4K, + TLB_DATA_4M, + TLB_DATA_2M_4M, + TLB_DATA_4K_4M, + TLB_DATA_1G, + TLB_DATA_1G_2M_4M, + + TLB_DATA0_4K, + TLB_DATA0_4M, + TLB_DATA0_2M_4M, + + STLB_4K, + STLB_4K_2M, +} __packed; +#ifndef __CHECKER__ +static_assert(sizeof(enum _tlb_table_type) == 1); +#endif + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index aeb7d6d48379..def433e0081f 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -626,28 +626,6 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -#define TLB_INST_4K 0x01 -#define TLB_INST_4M 0x02 -#define TLB_INST_2M_4M 0x03 - -#define TLB_INST_ALL 0x05 -#define TLB_INST_1G 0x06 - -#define TLB_DATA_4K 0x11 -#define TLB_DATA_4M 0x12 -#define TLB_DATA_2M_4M 0x13 -#define TLB_DATA_4K_4M 0x14 - -#define TLB_DATA_1G 0x16 -#define TLB_DATA_1G_2M_4M 0x17 - -#define TLB_DATA0_4K 0x21 -#define TLB_DATA0_4M 0x22 -#define TLB_DATA0_2M_4M 0x23 - -#define STLB_4K 0x41 -#define STLB_4K_2M 0x42 - /* * All of leaf 0x2's one-byte TLB descriptors implies the same number of * entries for their respective TLB types. The 0x63 descriptor is an @@ -660,7 +638,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) struct _tlb_table { unsigned char descriptor; - char tlb_type; + enum _tlb_table_type type; unsigned int entries; }; @@ -718,11 +696,11 @@ static void intel_tlb_lookup(const unsigned char desc) intel_tlb_table[k].descriptor != 0; k++) ; - if (intel_tlb_table[k].tlb_type == 0) + if (intel_tlb_table[k].type == 0) return; entries = intel_tlb_table[k].entries; - switch (intel_tlb_table[k].tlb_type) { + switch (intel_tlb_table[k].type) { case STLB_4K: tlb_lli_4k = max(tlb_lli_4k, entries); tlb_lld_4k = max(tlb_lld_4k, entries); From 37aedb806bc68ff23bd0d90dce1564bfb0dca911 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 24 Mar 2025 14:33:15 +0100 Subject: [PATCH 040/101] x86/cpu: Consolidate CPUID leaf 0x2 tables CPUID leaf 0x2 describes TLBs and caches. So there are two tables with the respective descriptor constants in intel.c and cacheinfo.c. The tables occupy almost 600 byte and require a loop based lookup for each variant. Combining them into one table occupies exactly 1k rodata and allows to get rid of the loop based lookup by just using the descriptor byte provided by CPUID leaf 0x2 as index into the table, which simplifies the code and reduces text size. The conversion of the intel.c and cacheinfo.c code is done separately. [ darwi: Actually define struct leaf_0x2_table. Tab-align all of cpuid_0x2_table[] mapping entries. Define needed SZ_* macros at instead (merged commit.) Use CACHE_L1_{INST,DATA} as names for L1 cache descriptor types. Set descriptor 0x63 type as TLB_DATA_1G_2M_4M and explain why. Use enums for cache and TLB descriptor types (parent commits.) Start enum types at 1 since type 0 is reserved for unknown descriptors. Ensure that cache and TLB enum type values do not intersect. Add leaf 0x2 table accessor for_each_leaf_0x2_entry() + documentation. ] Co-developed-by: Ahmed S. Darwish Signed-off-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-21-darwi@linutronix.de --- arch/x86/include/asm/cpuid/leaf_0x2_api.h | 33 +++++- arch/x86/include/asm/cpuid/types.h | 40 +++++-- arch/x86/kernel/cpu/Makefile | 2 +- arch/x86/kernel/cpu/cpuid_0x2_table.c | 128 ++++++++++++++++++++++ 4 files changed, 193 insertions(+), 10 deletions(-) create mode 100644 arch/x86/kernel/cpu/cpuid_0x2_table.c diff --git a/arch/x86/include/asm/cpuid/leaf_0x2_api.h b/arch/x86/include/asm/cpuid/leaf_0x2_api.h index 4c845fc96716..46ecb15e92d9 100644 --- a/arch/x86/include/asm/cpuid/leaf_0x2_api.h +++ b/arch/x86/include/asm/cpuid/leaf_0x2_api.h @@ -13,7 +13,8 @@ * invalid 1-byte descriptor returned by the hardware to zero (the NULL * cache/TLB descriptor) before returning it to the caller. * - * Use for_each_leaf_0x2_desc() to iterate over the returned output. + * Use for_each_leaf_0x2_entry() to iterate over the register output in + * parsed form. */ static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs) { @@ -62,4 +63,34 @@ static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs) #define for_each_leaf_0x2_desc(regs, desc) \ for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++) +/** + * for_each_leaf_0x2_entry() - Iterator for parsed leaf 0x2 descriptors + * @regs: Leaf 0x2 register output, returned by cpuid_get_leaf_0x2_regs() + * @__ptr: u8 pointer, for macro internal use only + * @entry: Pointer to parsed descriptor information at each iteration + * + * Loop over the 1-byte descriptors in the passed leaf 0x2 output registers + * @regs. Provide the parsed information for each descriptor through @entry. + * + * To handle cache-specific descriptors, switch on @entry->c_type. For TLB + * descriptors, switch on @entry->t_type. + * + * Example usage for cache descriptors:: + * + * const struct leaf_0x2_table *entry; + * union leaf_0x2_regs regs; + * u8 *ptr; + * + * cpuid_get_leaf_0x2_regs(®s); + * for_each_leaf_0x2_entry(regs, ptr, entry) { + * switch (entry->c_type) { + * ... + * } + * } + */ +#define for_each_leaf_0x2_entry(regs, __ptr, entry) \ + for (__ptr = &(regs).desc[1]; \ + __ptr < &(regs).desc[16] && (entry = &cpuid_0x2_table[*__ptr]); \ + __ptr++) + #endif /* _ASM_X86_CPUID_LEAF_0x2_API_H */ diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index e756327f8e02..24f643f2a2ad 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -48,27 +48,34 @@ union leaf_0x2_regs { /* * Leaf 0x2 1-byte descriptors' cache types - * To be used for their mappings at cache_table[] + * To be used for their mappings at cpuid_0x2_table[] + * + * Start at 1 since type 0 is reserved for HW byte descriptors which are + * not recognized by the kernel; i.e., those without an explicit mapping. */ enum _cache_table_type { - CACHE_L1_INST, + CACHE_L1_INST = 1, CACHE_L1_DATA, CACHE_L2, - CACHE_L3, + CACHE_L3 + /* Adjust __TLB_TABLE_TYPE_BEGIN before adding more types */ } __packed; #ifndef __CHECKER__ static_assert(sizeof(enum _cache_table_type) == 1); #endif +/* + * Ensure that leaf 0x2 cache and TLB type values do not intersect, + * since they share the same type field at struct cpuid_0x2_table. + */ +#define __TLB_TABLE_TYPE_BEGIN (CACHE_L3 + 1) + /* * Leaf 0x2 1-byte descriptors' TLB types - * To be used for their mappings at intel_tlb_table[] - * - * Start at 1 since type 0 is reserved for HW byte descriptors which are - * not recognized by the kernel; i.e., those without an explicit mapping. + * To be used for their mappings at cpuid_0x2_table[] */ enum _tlb_table_type { - TLB_INST_4K = 1, + TLB_INST_4K = __TLB_TABLE_TYPE_BEGIN, TLB_INST_4M, TLB_INST_2M_4M, TLB_INST_ALL, @@ -91,4 +98,21 @@ enum _tlb_table_type { static_assert(sizeof(enum _tlb_table_type) == 1); #endif +/* + * Combined parsing table for leaf 0x2 cache and TLB descriptors. + */ + +struct leaf_0x2_table { + union { + enum _cache_table_type c_type; + enum _tlb_table_type t_type; + }; + union { + short c_size; + short entries; + }; +}; + +extern const struct leaf_0x2_table cpuid_0x2_table[256]; + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index 3a39396d422d..1e26179ff18c 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile @@ -24,7 +24,7 @@ obj-y += rdrand.o obj-y += match.o obj-y += bugs.o obj-y += aperfmperf.o -obj-y += cpuid-deps.o +obj-y += cpuid-deps.o cpuid_0x2_table.o obj-y += umwait.o obj-y += capflags.o powerflags.o diff --git a/arch/x86/kernel/cpu/cpuid_0x2_table.c b/arch/x86/kernel/cpu/cpuid_0x2_table.c new file mode 100644 index 000000000000..89bc8db5e9c6 --- /dev/null +++ b/arch/x86/kernel/cpu/cpuid_0x2_table.c @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +#include + +#include "cpu.h" + +#define CACHE_ENTRY(_desc, _type, _size) \ + [_desc] = { \ + .c_type = (_type), \ + .c_size = (_size) / SZ_1K, \ + } + +#define TLB_ENTRY(_desc, _type, _entries) \ + [_desc] = { \ + .t_type = (_type), \ + .entries = (_entries), \ + } + +const struct leaf_0x2_table cpuid_0x2_table[256] = { + CACHE_ENTRY(0x06, CACHE_L1_INST, SZ_8K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x08, CACHE_L1_INST, SZ_16K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x09, CACHE_L1_INST, SZ_32K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x0a, CACHE_L1_DATA, SZ_8K ), /* 2 way set assoc, 32 byte line size */ + CACHE_ENTRY(0x0c, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x0d, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x0e, CACHE_L1_DATA, SZ_24K ), /* 6-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x21, CACHE_L2, SZ_256K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x22, CACHE_L3, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x23, CACHE_L3, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x25, CACHE_L3, SZ_2M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x29, CACHE_L3, SZ_4M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x2c, CACHE_L1_DATA, SZ_32K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x30, CACHE_L1_INST, SZ_32K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x39, CACHE_L2, SZ_128K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3a, CACHE_L2, SZ_192K ), /* 6-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3b, CACHE_L2, SZ_128K ), /* 2-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3c, CACHE_L2, SZ_256K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3d, CACHE_L2, SZ_384K ), /* 6-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3e, CACHE_L2, SZ_512K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x3f, CACHE_L2, SZ_256K ), /* 2-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x41, CACHE_L2, SZ_128K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x42, CACHE_L2, SZ_256K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x43, CACHE_L2, SZ_512K ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x44, CACHE_L2, SZ_1M ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x45, CACHE_L2, SZ_2M ), /* 4-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x46, CACHE_L3, SZ_4M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x47, CACHE_L3, SZ_8M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x48, CACHE_L2, SZ_3M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x49, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4a, CACHE_L3, SZ_6M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4b, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4c, CACHE_L3, SZ_12M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4d, CACHE_L3, SZ_16M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x4e, CACHE_L2, SZ_6M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x60, CACHE_L1_DATA, SZ_16K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x66, CACHE_L1_DATA, SZ_8K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x67, CACHE_L1_DATA, SZ_16K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x68, CACHE_L1_DATA, SZ_32K ), /* 4-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x78, CACHE_L2, SZ_1M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x79, CACHE_L2, SZ_128K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7a, CACHE_L2, SZ_256K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7b, CACHE_L2, SZ_512K ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7c, CACHE_L2, SZ_1M ), /* 8-way set assoc, sectored cache, 64 byte line size */ + CACHE_ENTRY(0x7d, CACHE_L2, SZ_2M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x7f, CACHE_L2, SZ_512K ), /* 2-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x80, CACHE_L2, SZ_512K ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x82, CACHE_L2, SZ_256K ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x83, CACHE_L2, SZ_512K ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x84, CACHE_L2, SZ_1M ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x85, CACHE_L2, SZ_2M ), /* 8-way set assoc, 32 byte line size */ + CACHE_ENTRY(0x86, CACHE_L2, SZ_512K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0x87, CACHE_L2, SZ_1M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd0, CACHE_L3, SZ_512K ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd1, CACHE_L3, SZ_1M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd2, CACHE_L3, SZ_2M ), /* 4-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd6, CACHE_L3, SZ_1M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd7, CACHE_L3, SZ_2M ), /* 8-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xd8, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xdc, CACHE_L3, SZ_2M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xdd, CACHE_L3, SZ_4M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xde, CACHE_L3, SZ_8M ), /* 12-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe2, CACHE_L3, SZ_2M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe3, CACHE_L3, SZ_4M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xe4, CACHE_L3, SZ_8M ), /* 16-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xea, CACHE_L3, SZ_12M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xeb, CACHE_L3, SZ_18M ), /* 24-way set assoc, 64 byte line size */ + CACHE_ENTRY(0xec, CACHE_L3, SZ_24M ), /* 24-way set assoc, 64 byte line size */ + + TLB_ENTRY( 0x01, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0x02, TLB_INST_4M, 2 ), /* TLB_INST 4 MByte pages, full associative */ + TLB_ENTRY( 0x03, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0x04, TLB_DATA_4M, 8 ), /* TLB_DATA 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x05, TLB_DATA_4M, 32 ), /* TLB_DATA 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x0b, TLB_INST_4M, 4 ), /* TLB_INST 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x4f, TLB_INST_4K, 32 ), /* TLB_INST 4 KByte pages */ + TLB_ENTRY( 0x50, TLB_INST_ALL, 64 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x51, TLB_INST_ALL, 128 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x52, TLB_INST_ALL, 256 ), /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ + TLB_ENTRY( 0x55, TLB_INST_2M_4M, 7 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + TLB_ENTRY( 0x56, TLB_DATA0_4M, 16 ), /* TLB_DATA0 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x57, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0x59, TLB_DATA0_4K, 16 ), /* TLB_DATA0 4 KByte pages, fully associative */ + TLB_ENTRY( 0x5a, TLB_DATA0_2M_4M, 32 ), /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ + TLB_ENTRY( 0x5b, TLB_DATA_4K_4M, 64 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x5c, TLB_DATA_4K_4M, 128 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x5d, TLB_DATA_4K_4M, 256 ), /* TLB_DATA 4 KByte and 4 MByte pages */ + TLB_ENTRY( 0x61, TLB_INST_4K, 48 ), /* TLB_INST 4 KByte pages, full associative */ + TLB_ENTRY( 0x63, TLB_DATA_1G_2M_4M, 4 ), /* TLB_DATA 1 GByte pages, 4-way set associative + * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ + TLB_ENTRY( 0x6b, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 8-way associative */ + TLB_ENTRY( 0x6c, TLB_DATA_2M_4M, 128 ), /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ + TLB_ENTRY( 0x6d, TLB_DATA_1G, 16 ), /* TLB_DATA 1 GByte pages, fully associative */ + TLB_ENTRY( 0x76, TLB_INST_2M_4M, 8 ), /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ + TLB_ENTRY( 0xb0, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb1, TLB_INST_2M_4M, 4 ), /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ + TLB_ENTRY( 0xb2, TLB_INST_4K, 64 ), /* TLB_INST 4KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb3, TLB_DATA_4K, 128 ), /* TLB_DATA 4 KByte pages, 4-way set associative */ + TLB_ENTRY( 0xb4, TLB_DATA_4K, 256 ), /* TLB_DATA 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0xb5, TLB_INST_4K, 64 ), /* TLB_INST 4 KByte pages, 8-way set associative */ + TLB_ENTRY( 0xb6, TLB_INST_4K, 128 ), /* TLB_INST 4 KByte pages, 8-way set associative */ + TLB_ENTRY( 0xba, TLB_DATA_4K, 64 ), /* TLB_DATA 4 KByte pages, 4-way associative */ + TLB_ENTRY( 0xc0, TLB_DATA_4K_4M, 8 ), /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ + TLB_ENTRY( 0xc1, STLB_4K_2M, 1024 ), /* STLB 4 KByte and 2 MByte pages, 8-way associative */ + TLB_ENTRY( 0xc2, TLB_DATA_2M_4M, 16 ), /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ + TLB_ENTRY( 0xca, STLB_4K, 512 ), /* STLB 4 KByte pages, 4-way associative */ +}; From da23a6259844b576d98ad5c633eb437d3a2d90d3 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:16 +0100 Subject: [PATCH 041/101] x86/cacheinfo: Use consolidated CPUID leaf 0x2 descriptor table CPUID leaf 0x2 output is a stream of one-byte descriptors, each implying certain details about the CPU's cache and TLB entries. At previous commits, the mapping tables for such descriptors were merged into one consolidated table. The mapping was also transformed into a hash lookup instead of a loop-based lookup for each descriptor. Use the new consolidated table and its hash-based lookup through the for_each_leaf_0x2_tlb_entry() accessor. Remove the old cache-specific mapping, cache_table[], as it is no longer used. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-22-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 114 +++----------------------------- 1 file changed, 8 insertions(+), 106 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 09c5aa9af973..e399bf2a701d 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -34,90 +34,6 @@ static cpumask_var_t cpu_cacheinfo_mask; /* Kernel controls MTRR and/or PAT MSRs. */ unsigned int memory_caching_control __ro_after_init; -struct _cache_table { - u8 descriptor; - enum _cache_table_type type; - short size; -}; - -#define MB(x) ((x) * 1024) - -/* All the cache descriptor types we care about (no TLB or - trace cache entries) */ - -static const struct _cache_table cache_table[] = -{ - { 0x06, CACHE_L1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ - { 0x08, CACHE_L1_INST, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x09, CACHE_L1_INST, 32 }, /* 4-way set assoc, 64 byte line size */ - { 0x0a, CACHE_L1_DATA, 8 }, /* 2 way set assoc, 32 byte line size */ - { 0x0c, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 32 byte line size */ - { 0x0d, CACHE_L1_DATA, 16 }, /* 4-way set assoc, 64 byte line size */ - { 0x0e, CACHE_L1_DATA, 24 }, /* 6-way set assoc, 64 byte line size */ - { 0x21, CACHE_L2, 256 }, /* 8-way set assoc, 64 byte line size */ - { 0x22, CACHE_L3, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x23, CACHE_L3, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x25, CACHE_L3, MB(2) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x29, CACHE_L3, MB(4) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x2c, CACHE_L1_DATA, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x30, CACHE_L1_INST, 32 }, /* 8-way set assoc, 64 byte line size */ - { 0x39, CACHE_L2, 128 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3a, CACHE_L2, 192 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3b, CACHE_L2, 128 }, /* 2-way set assoc, sectored cache, 64 byte line size */ - { 0x3c, CACHE_L2, 256 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3d, CACHE_L2, 384 }, /* 6-way set assoc, sectored cache, 64 byte line size */ - { 0x3e, CACHE_L2, 512 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x3f, CACHE_L2, 256 }, /* 2-way set assoc, 64 byte line size */ - { 0x41, CACHE_L2, 128 }, /* 4-way set assoc, 32 byte line size */ - { 0x42, CACHE_L2, 256 }, /* 4-way set assoc, 32 byte line size */ - { 0x43, CACHE_L2, 512 }, /* 4-way set assoc, 32 byte line size */ - { 0x44, CACHE_L2, MB(1) }, /* 4-way set assoc, 32 byte line size */ - { 0x45, CACHE_L2, MB(2) }, /* 4-way set assoc, 32 byte line size */ - { 0x46, CACHE_L3, MB(4) }, /* 4-way set assoc, 64 byte line size */ - { 0x47, CACHE_L3, MB(8) }, /* 8-way set assoc, 64 byte line size */ - { 0x48, CACHE_L2, MB(3) }, /* 12-way set assoc, 64 byte line size */ - { 0x49, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0x4a, CACHE_L3, MB(6) }, /* 12-way set assoc, 64 byte line size */ - { 0x4b, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0x4c, CACHE_L3, MB(12) }, /* 12-way set assoc, 64 byte line size */ - { 0x4d, CACHE_L3, MB(16) }, /* 16-way set assoc, 64 byte line size */ - { 0x4e, CACHE_L2, MB(6) }, /* 24-way set assoc, 64 byte line size */ - { 0x60, CACHE_L1_DATA, 16 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x66, CACHE_L1_DATA, 8 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x67, CACHE_L1_DATA, 16 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x68, CACHE_L1_DATA, 32 }, /* 4-way set assoc, sectored cache, 64 byte line size */ - { 0x78, CACHE_L2, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0x79, CACHE_L2, 128 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7a, CACHE_L2, 256 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7b, CACHE_L2, 512 }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7c, CACHE_L2, MB(1) }, /* 8-way set assoc, sectored cache, 64 byte line size */ - { 0x7d, CACHE_L2, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0x7f, CACHE_L2, 512 }, /* 2-way set assoc, 64 byte line size */ - { 0x80, CACHE_L2, 512 }, /* 8-way set assoc, 64 byte line size */ - { 0x82, CACHE_L2, 256 }, /* 8-way set assoc, 32 byte line size */ - { 0x83, CACHE_L2, 512 }, /* 8-way set assoc, 32 byte line size */ - { 0x84, CACHE_L2, MB(1) }, /* 8-way set assoc, 32 byte line size */ - { 0x85, CACHE_L2, MB(2) }, /* 8-way set assoc, 32 byte line size */ - { 0x86, CACHE_L2, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0x87, CACHE_L2, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd0, CACHE_L3, 512 }, /* 4-way set assoc, 64 byte line size */ - { 0xd1, CACHE_L3, MB(1) }, /* 4-way set assoc, 64 byte line size */ - { 0xd2, CACHE_L3, MB(2) }, /* 4-way set assoc, 64 byte line size */ - { 0xd6, CACHE_L3, MB(1) }, /* 8-way set assoc, 64 byte line size */ - { 0xd7, CACHE_L3, MB(2) }, /* 8-way set assoc, 64 byte line size */ - { 0xd8, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xdc, CACHE_L3, MB(2) }, /* 12-way set assoc, 64 byte line size */ - { 0xdd, CACHE_L3, MB(4) }, /* 12-way set assoc, 64 byte line size */ - { 0xde, CACHE_L3, MB(8) }, /* 12-way set assoc, 64 byte line size */ - { 0xe2, CACHE_L3, MB(2) }, /* 16-way set assoc, 64 byte line size */ - { 0xe3, CACHE_L3, MB(4) }, /* 16-way set assoc, 64 byte line size */ - { 0xe4, CACHE_L3, MB(8) }, /* 16-way set assoc, 64 byte line size */ - { 0xea, CACHE_L3, MB(12) }, /* 24-way set assoc, 64 byte line size */ - { 0xeb, CACHE_L3, MB(18) }, /* 24-way set assoc, 64 byte line size */ - { 0xec, CACHE_L3, MB(24) }, /* 24-way set assoc, 64 byte line size */ -}; - - enum _cache_type { CTYPE_NULL = 0, CTYPE_DATA = 1, @@ -439,16 +355,6 @@ void init_hygon_cacheinfo(struct cpuinfo_x86 *c) ci->num_leaves = find_num_cache_leaves(c); } -static const struct _cache_table *cache_table_get(u8 desc) -{ - for (int i = 0; i < ARRAY_SIZE(cache_table); i++) { - if (cache_table[i].descriptor == desc) - return &cache_table[i]; - } - - return NULL; -} - void init_intel_cacheinfo(struct cpuinfo_x86 *c) { /* Cache sizes */ @@ -505,21 +411,17 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Don't use CPUID(2) if CPUID(4) is supported. */ if (!ci->num_leaves && c->cpuid_level > 1) { - const struct _cache_table *entry; + const struct leaf_0x2_table *entry; union leaf_0x2_regs regs; - u8 *desc; + u8 *ptr; cpuid_get_leaf_0x2_regs(®s); - for_each_leaf_0x2_desc(regs, desc) { - entry = cache_table_get(*desc); - if (!entry) - continue; - - switch (entry->type) { - case CACHE_L1_INST: l1i += entry->size; break; - case CACHE_L1_DATA: l1d += entry->size; break; - case CACHE_L2: l2 += entry->size; break; - case CACHE_L3: l3 += entry->size; break; + for_each_leaf_0x2_entry(regs, ptr, entry) { + switch (entry->c_type) { + case CACHE_L1_INST: l1i += entry->c_size; break; + case CACHE_L1_DATA: l1d += entry->c_size; break; + case CACHE_L2: l2 += entry->c_size; break; + case CACHE_L3: l3 += entry->c_size; break; } } } From 4772304ee651b952fd098ec80a8298af9905743f Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:17 +0100 Subject: [PATCH 042/101] x86/cpu: Use consolidated CPUID leaf 0x2 descriptor table CPUID leaf 0x2 output is a stream of one-byte descriptors, each implying certain details about the CPU's cache and TLB entries. At previous commits, the mapping tables for such descriptors were merged into one consolidated table. The mapping was also transformed into a hash lookup instead of a loop-based lookup for each descriptor. Use the new consolidated table and its hash-based lookup through the for_each_leaf_0x2_tlb_entry() accessor. Remove the TLB-specific mapping, intel_tlb_table[], as it is now no longer used. Remove the macro, for_each_leaf_0x2_desc(), since the converted code was its last user. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-23-darwi@linutronix.de --- arch/x86/include/asm/cpuid/types.h | 10 ++++ arch/x86/kernel/cpu/intel.c | 83 +++--------------------------- 2 files changed, 17 insertions(+), 76 deletions(-) diff --git a/arch/x86/include/asm/cpuid/types.h b/arch/x86/include/asm/cpuid/types.h index 24f643f2a2ad..c95fee66e148 100644 --- a/arch/x86/include/asm/cpuid/types.h +++ b/arch/x86/include/asm/cpuid/types.h @@ -115,4 +115,14 @@ struct leaf_0x2_table { extern const struct leaf_0x2_table cpuid_0x2_table[256]; +/* + * All of leaf 0x2's one-byte TLB descriptors implies the same number of entries + * for their respective TLB types. TLB descriptor 0x63 is an exception: it + * implies 4 dTLB entries for 1GB pages and 32 dTLB entries for 2MB or 4MB pages. + * + * Encode that descriptor's dTLB entry count for 2MB/4MB pages here, as the entry + * count for dTLB 1GB pages is already encoded at the cpuid_0x2_table[]'s mapping. + */ +#define TLB_0x63_2M_4M_ENTRIES 32 + #endif /* _ASM_X86_CPUID_TYPES_H */ diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index def433e0081f..e5d814703406 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -626,81 +626,11 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) } #endif -/* - * All of leaf 0x2's one-byte TLB descriptors implies the same number of - * entries for their respective TLB types. The 0x63 descriptor is an - * exception: it implies 4 dTLB entries for 1GB pages 32 dTLB entries - * for 2MB or 4MB pages. Encode descriptor 0x63 dTLB entry count for - * 2MB/4MB pages here, as its count for dTLB 1GB pages is already at the - * intel_tlb_table[] mapping. - */ -#define TLB_0x63_2M_4M_ENTRIES 32 - -struct _tlb_table { - unsigned char descriptor; - enum _tlb_table_type type; - unsigned int entries; -}; - -static const struct _tlb_table intel_tlb_table[] = { - { 0x01, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages, 4-way set associative */ - { 0x02, TLB_INST_4M, 2}, /* TLB_INST 4 MByte pages, full associative */ - { 0x03, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way set associative */ - { 0x04, TLB_DATA_4M, 8}, /* TLB_DATA 4 MByte pages, 4-way set associative */ - { 0x05, TLB_DATA_4M, 32}, /* TLB_DATA 4 MByte pages, 4-way set associative */ - { 0x0b, TLB_INST_4M, 4}, /* TLB_INST 4 MByte pages, 4-way set associative */ - { 0x4f, TLB_INST_4K, 32}, /* TLB_INST 4 KByte pages */ - { 0x50, TLB_INST_ALL, 64}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x51, TLB_INST_ALL, 128}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x52, TLB_INST_ALL, 256}, /* TLB_INST 4 KByte and 2-MByte or 4-MByte pages */ - { 0x55, TLB_INST_2M_4M, 7}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ - { 0x56, TLB_DATA0_4M, 16}, /* TLB_DATA0 4 MByte pages, 4-way set associative */ - { 0x57, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, 4-way associative */ - { 0x59, TLB_DATA0_4K, 16}, /* TLB_DATA0 4 KByte pages, fully associative */ - { 0x5a, TLB_DATA0_2M_4M, 32}, /* TLB_DATA0 2-MByte or 4 MByte pages, 4-way set associative */ - { 0x5b, TLB_DATA_4K_4M, 64}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x5c, TLB_DATA_4K_4M, 128}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x5d, TLB_DATA_4K_4M, 256}, /* TLB_DATA 4 KByte and 4 MByte pages */ - { 0x61, TLB_INST_4K, 48}, /* TLB_INST 4 KByte pages, full associative */ - { 0x63, TLB_DATA_1G_2M_4M, 4}, /* TLB_DATA 1 GByte pages, 4-way set associative - * (plus 32 entries TLB_DATA 2 MByte or 4 MByte pages, not encoded here) */ - { 0x6b, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 8-way associative */ - { 0x6c, TLB_DATA_2M_4M, 128}, /* TLB_DATA 2 MByte or 4 MByte pages, 8-way associative */ - { 0x6d, TLB_DATA_1G, 16}, /* TLB_DATA 1 GByte pages, fully associative */ - { 0x76, TLB_INST_2M_4M, 8}, /* TLB_INST 2-MByte or 4-MByte pages, fully associative */ - { 0xb0, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 4-way set associative */ - { 0xb1, TLB_INST_2M_4M, 4}, /* TLB_INST 2M pages, 4-way, 8 entries or 4M pages, 4-way entries */ - { 0xb2, TLB_INST_4K, 64}, /* TLB_INST 4KByte pages, 4-way set associative */ - { 0xb3, TLB_DATA_4K, 128}, /* TLB_DATA 4 KByte pages, 4-way set associative */ - { 0xb4, TLB_DATA_4K, 256}, /* TLB_DATA 4 KByte pages, 4-way associative */ - { 0xb5, TLB_INST_4K, 64}, /* TLB_INST 4 KByte pages, 8-way set associative */ - { 0xb6, TLB_INST_4K, 128}, /* TLB_INST 4 KByte pages, 8-way set associative */ - { 0xba, TLB_DATA_4K, 64}, /* TLB_DATA 4 KByte pages, 4-way associative */ - { 0xc0, TLB_DATA_4K_4M, 8}, /* TLB_DATA 4 KByte and 4 MByte pages, 4-way associative */ - { 0xc1, STLB_4K_2M, 1024}, /* STLB 4 KByte and 2 MByte pages, 8-way associative */ - { 0xc2, TLB_DATA_2M_4M, 16}, /* TLB_DATA 2 MByte/4MByte pages, 4-way associative */ - { 0xca, STLB_4K, 512}, /* STLB 4 KByte pages, 4-way associative */ - { 0x00, 0, 0 } -}; - -static void intel_tlb_lookup(const unsigned char desc) +static void intel_tlb_lookup(const struct leaf_0x2_table *entry) { - unsigned int entries; - unsigned char k; + short entries = entry->entries; - if (desc == 0) - return; - - /* look up this descriptor in the table */ - for (k = 0; intel_tlb_table[k].descriptor != desc && - intel_tlb_table[k].descriptor != 0; k++) - ; - - if (intel_tlb_table[k].type == 0) - return; - - entries = intel_tlb_table[k].entries; - switch (intel_tlb_table[k].type) { + switch (entry->t_type) { case STLB_4K: tlb_lli_4k = max(tlb_lli_4k, entries); tlb_lld_4k = max(tlb_lld_4k, entries); @@ -757,15 +687,16 @@ static void intel_tlb_lookup(const unsigned char desc) static void intel_detect_tlb(struct cpuinfo_x86 *c) { + const struct leaf_0x2_table *entry; union leaf_0x2_regs regs; - u8 *desc; + u8 *ptr; if (c->cpuid_level < 2) return; cpuid_get_leaf_0x2_regs(®s); - for_each_leaf_0x2_desc(regs, desc) - intel_tlb_lookup(*desc); + for_each_leaf_0x2_entry(regs, ptr, entry) + intel_tlb_lookup(entry); } static const struct cpu_dev intel_cpu_dev = { From 5adfd367589cf9bb984aabfab74107bcf4402fde Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:18 +0100 Subject: [PATCH 043/101] x86/cacheinfo: Separate CPUID leaf 0x2 handling and post-processing logic The logic of init_intel_cacheinfo() is quite convoluted: it mixes leaf 0x4 parsing, leaf 0x2 parsing, plus some post-processing, in a single place. Begin simplifying its logic by extracting the leaf 0x2 parsing code, and the post-processing logic, into their own functions. While at it, rework the SMT LLC topology ID comment for clarity. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-24-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 106 +++++++++++++++++--------------- 1 file changed, 58 insertions(+), 48 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e399bf2a701d..b39aad1ecf9c 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -355,14 +355,56 @@ void init_hygon_cacheinfo(struct cpuinfo_x86 *c) ci->num_leaves = find_num_cache_leaves(c); } +static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, + unsigned int l2, unsigned int l1i, unsigned int l1d) +{ + /* + * If llc_id is still unset, then cpuid_level < 4, which implies + * that the only possibility left is SMT. Since CPUID(2) doesn't + * specify any shared caches and SMT shares all caches, we can + * unconditionally set LLC ID to the package ID so that all + * threads share it. + */ + if (c->topo.llc_id == BAD_APICID) + c->topo.llc_id = c->topo.pkg_id; + + c->x86_cache_size = l3 ? l3 : (l2 ? l2 : l1i + l1d); + + if (!l2) + cpu_detect_cache_sizes(c); +} + +/* + * Legacy Intel CPUID(2) path if CPUID(4) is not available. + */ +static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) +{ + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; + const struct leaf_0x2_table *entry; + union leaf_0x2_regs regs; + u8 *ptr; + + if (c->cpuid_level < 2) + return; + + cpuid_get_leaf_0x2_regs(®s); + for_each_leaf_0x2_entry(regs, ptr, entry) { + switch (entry->c_type) { + case CACHE_L1_INST: l1i += entry->c_size; break; + case CACHE_L1_DATA: l1d += entry->c_size; break; + case CACHE_L2: l2 += entry->c_size; break; + case CACHE_L3: l3 += entry->c_size; break; + } + } + + intel_cacheinfo_done(c, l3, l2, l1i, l1d); +} + void init_intel_cacheinfo(struct cpuinfo_x86 *c) { - /* Cache sizes */ - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; - unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ - unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ - unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); + unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; + unsigned int l2_id = 0, l3_id = 0; if (c->cpuid_level > 3) { /* @@ -376,7 +418,8 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) * Whenever possible use cpuid(4), deterministic cache * parameters cpuid leaf to find the cache details */ - for (i = 0; i < ci->num_leaves; i++) { + for (int i = 0; i < ci->num_leaves; i++) { + unsigned int num_threads_sharing, index_msb; struct _cpuid4_info id4 = {}; int retval; @@ -387,18 +430,18 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) switch (id4.eax.split.level) { case 1: if (id4.eax.split.type == CTYPE_DATA) - new_l1d = id4.size/1024; + l1d = id4.size / 1024; else if (id4.eax.split.type == CTYPE_INST) - new_l1i = id4.size/1024; + l1i = id4.size / 1024; break; case 2: - new_l2 = id4.size/1024; + l2 = id4.size / 1024; num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l2_id = c->topo.apicid & ~((1 << index_msb) - 1); break; case 3: - new_l3 = id4.size/1024; + l3 = id4.size / 1024; num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; index_msb = get_count_order(num_threads_sharing); l3_id = c->topo.apicid & ~((1 << index_msb) - 1); @@ -411,52 +454,19 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) /* Don't use CPUID(2) if CPUID(4) is supported. */ if (!ci->num_leaves && c->cpuid_level > 1) { - const struct leaf_0x2_table *entry; - union leaf_0x2_regs regs; - u8 *ptr; - - cpuid_get_leaf_0x2_regs(®s); - for_each_leaf_0x2_entry(regs, ptr, entry) { - switch (entry->c_type) { - case CACHE_L1_INST: l1i += entry->c_size; break; - case CACHE_L1_DATA: l1d += entry->c_size; break; - case CACHE_L2: l2 += entry->c_size; break; - case CACHE_L3: l3 += entry->c_size; break; - } - } + intel_cacheinfo_0x2(c); + return; } - if (new_l1d) - l1d = new_l1d; - - if (new_l1i) - l1i = new_l1i; - - if (new_l2) { - l2 = new_l2; + if (l2) { c->topo.llc_id = l2_id; c->topo.l2c_id = l2_id; } - if (new_l3) { - l3 = new_l3; + if (l3) c->topo.llc_id = l3_id; - } - /* - * If llc_id is not yet set, this means cpuid_level < 4 which in - * turns means that the only possibility is SMT (as indicated in - * cpuid1). Since cpuid2 doesn't specify shared caches, and we know - * that SMT shares all caches, we can unconditionally set cpu_llc_id to - * c->topo.pkg_id. - */ - if (c->topo.llc_id == BAD_APICID) - c->topo.llc_id = c->topo.pkg_id; - - c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d)); - - if (!l2) - cpu_detect_cache_sizes(c); + intel_cacheinfo_done(c, l3, l2, l1i, l1d); } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, From 66122616e212f1b9fee7d03582a5fdab2e8ed0e4 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:19 +0100 Subject: [PATCH 044/101] x86/cacheinfo: Separate Intel CPUID leaf 0x4 handling init_intel_cacheinfo() was overly complex. It parsed leaf 0x4 data, leaf 0x2 data, and performed post-processing, all within one function. Parent commit moved leaf 0x2 parsing and the post-processing logic into their own functions. Continue the refactoring by extracting leaf 0x4 parsing into its own function. Initialize local L2/L3 topology ID variables to BAD_APICID by default, thus ensuring they can be used unconditionally. Suggested-by: Thomas Gleixner Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-25-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 112 ++++++++++++++++---------------- 1 file changed, 55 insertions(+), 57 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index b39aad1ecf9c..72cc32d22c4d 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -400,73 +400,71 @@ static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) intel_cacheinfo_done(c, l3, l2, l1i, l1d); } -void init_intel_cacheinfo(struct cpuinfo_x86 *c) +static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); - unsigned int l1i = 0, l1d = 0, l2 = 0, l3 = 0; - unsigned int l2_id = 0, l3_id = 0; + unsigned int l2_id = BAD_APICID, l3_id = BAD_APICID; + unsigned int l1d = 0, l1i = 0, l2 = 0, l3 = 0; - if (c->cpuid_level > 3) { - /* - * There should be at least one leaf. A non-zero value means - * that the number of leaves has been initialized. - */ - if (!ci->num_leaves) - ci->num_leaves = find_num_cache_leaves(c); + if (c->cpuid_level < 4) + return false; - /* - * Whenever possible use cpuid(4), deterministic cache - * parameters cpuid leaf to find the cache details - */ - for (int i = 0; i < ci->num_leaves; i++) { - unsigned int num_threads_sharing, index_msb; - struct _cpuid4_info id4 = {}; - int retval; + /* + * There should be at least one leaf. A non-zero value means + * that the number of leaves has been previously initialized. + */ + if (!ci->num_leaves) + ci->num_leaves = find_num_cache_leaves(c); - retval = intel_fill_cpuid4_info(i, &id4); - if (retval < 0) - continue; + if (!ci->num_leaves) + return false; - switch (id4.eax.split.level) { - case 1: - if (id4.eax.split.type == CTYPE_DATA) - l1d = id4.size / 1024; - else if (id4.eax.split.type == CTYPE_INST) - l1i = id4.size / 1024; - break; - case 2: - l2 = id4.size / 1024; - num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->topo.apicid & ~((1 << index_msb) - 1); - break; - case 3: - l3 = id4.size / 1024; - num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l3_id = c->topo.apicid & ~((1 << index_msb) - 1); - break; - default: - break; - } + for (int i = 0; i < ci->num_leaves; i++) { + unsigned int num_threads_sharing, index_msb; + struct _cpuid4_info id4 = {}; + int ret; + + ret = intel_fill_cpuid4_info(i, &id4); + if (ret < 0) + continue; + + switch (id4.eax.split.level) { + case 1: + if (id4.eax.split.type == CTYPE_DATA) + l1d = id4.size / 1024; + else if (id4.eax.split.type == CTYPE_INST) + l1i = id4.size / 1024; + break; + case 2: + l2 = id4.size / 1024; + num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l2_id = c->topo.apicid & ~((1 << index_msb) - 1); + break; + case 3: + l3 = id4.size / 1024; + num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + l3_id = c->topo.apicid & ~((1 << index_msb) - 1); + break; + default: + break; } } - /* Don't use CPUID(2) if CPUID(4) is supported. */ - if (!ci->num_leaves && c->cpuid_level > 1) { - intel_cacheinfo_0x2(c); - return; - } - - if (l2) { - c->topo.llc_id = l2_id; - c->topo.l2c_id = l2_id; - } - - if (l3) - c->topo.llc_id = l3_id; - + c->topo.l2c_id = l2_id; + c->topo.llc_id = (l3_id == BAD_APICID) ? l2_id : l3_id; intel_cacheinfo_done(c, l3, l2, l1i, l1d); + return true; +} + +void init_intel_cacheinfo(struct cpuinfo_x86 *c) +{ + /* Don't use CPUID(2) if CPUID(4) is supported. */ + if (intel_cacheinfo_0x4(c)) + return; + + intel_cacheinfo_0x2(c); } static int __cache_amd_cpumap_setup(unsigned int cpu, int index, From fda5f817ae41035db9e36854600525145688b35e Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:20 +0100 Subject: [PATCH 045/101] x86/cacheinfo: Extract out cache level topology ID calculation For Intel CPUID leaf 0x4 parsing, refactor the cache level topology ID calculation code into its own method instead of repeating the same logic twice for L2 and L3. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-26-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 72cc32d22c4d..7b274da7c725 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -400,6 +400,16 @@ static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) intel_cacheinfo_done(c, l3, l2, l1i, l1d); } +static unsigned int calc_cache_topo_id(struct cpuinfo_x86 *c, const struct _cpuid4_info *id4) +{ + unsigned int num_threads_sharing; + int index_msb; + + num_threads_sharing = 1 + id4->eax.split.num_threads_sharing; + index_msb = get_count_order(num_threads_sharing); + return c->topo.apicid & ~((1 << index_msb) - 1); +} + static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); @@ -420,7 +430,6 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) return false; for (int i = 0; i < ci->num_leaves; i++) { - unsigned int num_threads_sharing, index_msb; struct _cpuid4_info id4 = {}; int ret; @@ -437,15 +446,11 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) break; case 2: l2 = id4.size / 1024; - num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l2_id = c->topo.apicid & ~((1 << index_msb) - 1); + l2_id = calc_cache_topo_id(c, &id4); break; case 3: l3 = id4.size / 1024; - num_threads_sharing = 1 + id4.eax.split.num_threads_sharing; - index_msb = get_count_order(num_threads_sharing); - l3_id = c->topo.apicid & ~((1 << index_msb) - 1); + l3_id = calc_cache_topo_id(c, &id4); break; default: break; From 05d48035e5f69cfe3c125b2bac47e4003b4acccf Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:21 +0100 Subject: [PATCH 046/101] x86/cacheinfo: Extract out cache self-snoop checks The logic of not doing a cache flush if the CPU declares cache self snooping support is repeated across the x86/cacheinfo code. Extract it into its own function. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-27-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 7b274da7c725..231470cdc1da 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -646,6 +646,17 @@ int populate_cache_leaves(unsigned int cpu) static unsigned long saved_cr4; static DEFINE_RAW_SPINLOCK(cache_disable_lock); +/* + * Cache flushing is the most time-consuming step when programming the + * MTRRs. On many Intel CPUs without known erratas, it can be skipped + * if the CPU declares cache self-snooping support. + */ +static void maybe_flush_caches(void) +{ + if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) + wbinvd(); +} + void cache_disable(void) __acquires(cache_disable_lock) { unsigned long cr0; @@ -663,14 +674,7 @@ void cache_disable(void) __acquires(cache_disable_lock) cr0 = read_cr0() | X86_CR0_CD; write_cr0(cr0); - /* - * Cache flushing is the most time-consuming step when programming - * the MTRRs. Fortunately, as per the Intel Software Development - * Manual, we can skip it if the processor supports cache self- - * snooping. - */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) - wbinvd(); + maybe_flush_caches(); /* Save value of CR4 and clear Page Global Enable (bit 7) */ if (cpu_feature_enabled(X86_FEATURE_PGE)) { @@ -685,9 +689,7 @@ void cache_disable(void) __acquires(cache_disable_lock) if (cpu_feature_enabled(X86_FEATURE_MTRR)) mtrr_disable(); - /* Again, only flush caches if we have to. */ - if (!static_cpu_has(X86_FEATURE_SELFSNOOP)) - wbinvd(); + maybe_flush_caches(); } void cache_enable(void) __releases(cache_disable_lock) From eeeebc4fc642f492522e4be92dcaf22119123581 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:22 +0100 Subject: [PATCH 047/101] x86/cacheinfo: Relocate CPUID leaf 0x4 cache_type mapping The cache_type_map[] array is used to map Intel leaf 0x4 cache_type values to their corresponding types at . Move that array's definition after the actual CPUID leaf 0x4 structures, instead of having it in the middle of AMD leaf 0x4 emulation code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-28-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 231470cdc1da..e0d531e27ff3 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -78,6 +78,14 @@ struct _cpuid4_info { unsigned long size; }; +/* Map CPUID(4) EAX.cache_type to linux/cacheinfo.h types */ +static const enum cache_type cache_type_map[] = { + [CTYPE_NULL] = CACHE_TYPE_NOCACHE, + [CTYPE_DATA] = CACHE_TYPE_DATA, + [CTYPE_INST] = CACHE_TYPE_INST, + [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, +}; + /* * Fallback AMD CPUID(4) emulation * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) @@ -131,13 +139,6 @@ static const unsigned short assocs[] = { static const unsigned char levels[] = { 1, 1, 2, 3 }; static const unsigned char types[] = { 1, 2, 3, 3 }; -static const enum cache_type cache_type_map[] = { - [CTYPE_NULL] = CACHE_TYPE_NOCACHE, - [CTYPE_DATA] = CACHE_TYPE_DATA, - [CTYPE_INST] = CACHE_TYPE_INST, - [CTYPE_UNIFIED] = CACHE_TYPE_UNIFIED, -}; - static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) { From 6c963c42fc19d7c9ae9582ab75c3476d1752d979 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:23 +0100 Subject: [PATCH 048/101] x86/cacheinfo: Introduce cpuid_amd_hygon_has_l3_cache() Multiple code paths at cacheinfo.c and amd_nb.c check for AMD/Hygon CPUs L3 cache presensce by directly checking leaf 0x80000006 EDX output. Extract that logic into its own function. While at it, rework the AMD/Hygon LLC topology ID caclculation comments for clarity. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-29-darwi@linutronix.de --- arch/x86/include/asm/cpuid/api.h | 9 +++++++++ arch/x86/kernel/amd_nb.c | 7 +++---- arch/x86/kernel/cpu/cacheinfo.c | 32 ++++++++++++++------------------ 3 files changed, 26 insertions(+), 22 deletions(-) diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h index 9c180c9cc58e..bf75c6267311 100644 --- a/arch/x86/include/asm/cpuid/api.h +++ b/arch/x86/include/asm/cpuid/api.h @@ -207,4 +207,13 @@ static inline u32 hypervisor_cpuid_base(const char *sig, u32 leaves) return 0; } +/* + * CPUID(0x80000006) parsing helpers + */ + +static inline bool cpuid_amd_hygon_has_l3_cache(void) +{ + return cpuid_edx(0x80000006); +} + #endif /* _ASM_X86_CPUID_API_H */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 6d12a9b69432..5a8cc48f80be 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -13,7 +13,9 @@ #include #include #include + #include +#include static u32 *flush_words; @@ -91,10 +93,7 @@ static int amd_cache_northbridges(void) if (amd_gart_present()) amd_northbridges.flags |= AMD_NB_GART; - /* - * Check for L3 cache presence. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return 0; /* diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index e0d531e27ff3..71587570705f 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -281,29 +281,29 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c) return i; } +/* + * AMD/Hygon CPUs may have multiple LLCs if L3 caches exist. + */ + void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) { - /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return; if (c->x86 < 0x17) { - /* LLC is at the node level. */ + /* Pre-Zen: LLC is at the node level */ c->topo.llc_id = die_id; } else if (c->x86 == 0x17 && c->x86_model <= 0x1F) { /* - * LLC is at the core complex level. - * Core complex ID is ApicId[3] for these processors. + * Family 17h up to 1F models: LLC is at the core + * complex level. Core complex ID is ApicId[3]. */ c->topo.llc_id = c->topo.apicid >> 3; } else { /* - * LLC ID is calculated from the number of threads sharing the - * cache. - * */ + * Newer families: LLC ID is calculated from the number + * of threads sharing the L3 cache. + */ u32 eax, ebx, ecx, edx, num_sharing_cache = 0; u32 llc_index = find_num_cache_leaves(c) - 1; @@ -321,16 +321,12 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c) { - /* - * We may have multiple LLCs if L3 caches exist, so check if we - * have an L3 cache by looking at the L3 cache CPUID leaf. - */ - if (!cpuid_edx(0x80000006)) + if (!cpuid_amd_hygon_has_l3_cache()) return; /* - * LLC is at the core complex level. - * Core complex ID is ApicId[3] for these processors. + * Hygons are similar to AMD Family 17h up to 1F models: LLC is + * at the core complex level. Core complex ID is ApicId[3]. */ c->topo.llc_id = c->topo.apicid >> 3; } From 0dd09e215a391805130338688586805fe8bf2b32 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Mon, 24 Mar 2025 14:33:24 +0100 Subject: [PATCH 049/101] x86/cacheinfo: Apply maintainer-tip coding style fixes The x86/cacheinfo code has been heavily refactored and fleshed out at parent commits, where any necessary coding style fixes were also done in place. Apply Documentation/process/maintainer-tip.rst coding style fixes to the rest of the code, and align its assignment expressions for readability. Standardize on CPUID(n) when mentioning leaf queries. Avoid breaking long lines when doing so helps readability. At cacheinfo_amd_init_llc_id(), rename variable 'msb' to 'index_msb' as this is how it's called at the rest of cacheinfo.c code. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250324133324.23458-30-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 215 ++++++++++++++++---------------- 1 file changed, 107 insertions(+), 108 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 71587570705f..cd48d34ac04b 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -1,11 +1,11 @@ // SPDX-License-Identifier: GPL-2.0 /* - * Routines to identify caches on Intel CPU. + * x86 CPU caches detection and configuration * - * Changes: - * Venkatesh Pallipadi : Adding cache identification through cpuid(4) - * Ashok Raj : Work with CPU hotplug infrastructure. - * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. + * Previous changes + * - Venkatesh Pallipadi: Cache identification through CPUID(4) + * - Ashok Raj : Work with CPU hotplug infrastructure + * - Andi Kleen / Andreas Herrmann: CPUID(4) emulation on AMD */ #include @@ -35,37 +35,37 @@ static cpumask_var_t cpu_cacheinfo_mask; unsigned int memory_caching_control __ro_after_init; enum _cache_type { - CTYPE_NULL = 0, - CTYPE_DATA = 1, - CTYPE_INST = 2, - CTYPE_UNIFIED = 3 + CTYPE_NULL = 0, + CTYPE_DATA = 1, + CTYPE_INST = 2, + CTYPE_UNIFIED = 3 }; union _cpuid4_leaf_eax { struct { - enum _cache_type type:5; - unsigned int level:3; - unsigned int is_self_initializing:1; - unsigned int is_fully_associative:1; - unsigned int reserved:4; - unsigned int num_threads_sharing:12; - unsigned int num_cores_on_die:6; + enum _cache_type type :5; + unsigned int level :3; + unsigned int is_self_initializing :1; + unsigned int is_fully_associative :1; + unsigned int reserved :4; + unsigned int num_threads_sharing :12; + unsigned int num_cores_on_die :6; } split; u32 full; }; union _cpuid4_leaf_ebx { struct { - unsigned int coherency_line_size:12; - unsigned int physical_line_partition:10; - unsigned int ways_of_associativity:10; + unsigned int coherency_line_size :12; + unsigned int physical_line_partition :10; + unsigned int ways_of_associativity :10; } split; u32 full; }; union _cpuid4_leaf_ecx { struct { - unsigned int number_of_sets:32; + unsigned int number_of_sets :32; } split; u32 full; }; @@ -93,60 +93,59 @@ static const enum cache_type cache_type_map[] = { union l1_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:8; - unsigned assoc:8; - unsigned size_in_kb:8; + unsigned line_size :8; + unsigned lines_per_tag :8; + unsigned assoc :8; + unsigned size_in_kb :8; }; - unsigned val; + unsigned int val; }; union l2_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned size_in_kb:16; + unsigned line_size :8; + unsigned lines_per_tag :4; + unsigned assoc :4; + unsigned size_in_kb :16; }; - unsigned val; + unsigned int val; }; union l3_cache { struct { - unsigned line_size:8; - unsigned lines_per_tag:4; - unsigned assoc:4; - unsigned res:2; - unsigned size_encoded:14; + unsigned line_size :8; + unsigned lines_per_tag :4; + unsigned assoc :4; + unsigned res :2; + unsigned size_encoded :14; }; - unsigned val; + unsigned int val; }; static const unsigned short assocs[] = { - [1] = 1, - [2] = 2, - [4] = 4, - [6] = 8, - [8] = 16, - [0xa] = 32, - [0xb] = 48, - [0xc] = 64, - [0xd] = 96, - [0xe] = 128, - [0xf] = 0xffff /* fully associative - no way to show this currently */ + [1] = 1, + [2] = 2, + [4] = 4, + [6] = 8, + [8] = 16, + [0xa] = 32, + [0xb] = 48, + [0xc] = 64, + [0xd] = 96, + [0xe] = 128, + [0xf] = 0xffff /* Fully associative */ }; static const unsigned char levels[] = { 1, 1, 2, 3 }; -static const unsigned char types[] = { 1, 2, 3, 3 }; +static const unsigned char types[] = { 1, 2, 3, 3 }; static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, union _cpuid4_leaf_ebx *ebx, union _cpuid4_leaf_ecx *ecx) { unsigned int dummy, line_size, lines_per_tag, assoc, size_in_kb; - union l1_cache l1i, l1d; + union l1_cache l1i, l1d, *l1; union l2_cache l2; union l3_cache l3; - union l1_cache *l1 = &l1d; eax->full = 0; ebx->full = 0; @@ -155,6 +154,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, cpuid(0x80000005, &dummy, &dummy, &l1d.val, &l1i.val); cpuid(0x80000006, &dummy, &dummy, &l2.val, &l3.val); + l1 = &l1d; switch (index) { case 1: l1 = &l1i; @@ -162,48 +162,52 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, case 0: if (!l1->val) return; - assoc = assocs[l1->assoc]; - line_size = l1->line_size; - lines_per_tag = l1->lines_per_tag; - size_in_kb = l1->size_in_kb; + + assoc = assocs[l1->assoc]; + line_size = l1->line_size; + lines_per_tag = l1->lines_per_tag; + size_in_kb = l1->size_in_kb; break; case 2: if (!l2.val) return; - assoc = assocs[l2.assoc]; - line_size = l2.line_size; - lines_per_tag = l2.lines_per_tag; - /* cpu_data has errata corrections for K7 applied */ - size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); + + /* Use x86_cache_size as it might have K7 errata fixes */ + assoc = assocs[l2.assoc]; + line_size = l2.line_size; + lines_per_tag = l2.lines_per_tag; + size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); break; case 3: if (!l3.val) return; - assoc = assocs[l3.assoc]; - line_size = l3.line_size; - lines_per_tag = l3.lines_per_tag; - size_in_kb = l3.size_encoded * 512; + + assoc = assocs[l3.assoc]; + line_size = l3.line_size; + lines_per_tag = l3.lines_per_tag; + size_in_kb = l3.size_encoded * 512; if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { - size_in_kb = size_in_kb >> 1; - assoc = assoc >> 1; + size_in_kb = size_in_kb >> 1; + assoc = assoc >> 1; } break; default: return; } - eax->split.is_self_initializing = 1; - eax->split.type = types[index]; - eax->split.level = levels[index]; - eax->split.num_threads_sharing = 0; - eax->split.num_cores_on_die = topology_num_cores_per_package(); + eax->split.is_self_initializing = 1; + eax->split.type = types[index]; + eax->split.level = levels[index]; + eax->split.num_threads_sharing = 0; + eax->split.num_cores_on_die = topology_num_cores_per_package(); if (assoc == 0xffff) eax->split.is_fully_associative = 1; - ebx->split.coherency_line_size = line_size - 1; - ebx->split.ways_of_associativity = assoc - 1; - ebx->split.physical_line_partition = lines_per_tag - 1; - ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / + + ebx->split.coherency_line_size = line_size - 1; + ebx->split.ways_of_associativity = assoc - 1; + ebx->split.physical_line_partition = lines_per_tag - 1; + ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / (ebx->split.ways_of_associativity + 1) - 1; } @@ -262,19 +266,14 @@ static int fill_cpuid4_info(int index, struct _cpuid4_info *id4) static int find_num_cache_leaves(struct cpuinfo_x86 *c) { - unsigned int eax, ebx, ecx, edx, op; - union _cpuid4_leaf_eax cache_eax; - int i = -1; - - if (c->x86_vendor == X86_VENDOR_AMD || - c->x86_vendor == X86_VENDOR_HYGON) - op = 0x8000001d; - else - op = 4; + unsigned int eax, ebx, ecx, edx, op; + union _cpuid4_leaf_eax cache_eax; + int i = -1; + /* Do a CPUID(op) loop to calculate num_cache_leaves */ + op = (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) ? 0x8000001d : 4; do { ++i; - /* Do cpuid(op) loop to find out num_cache_leaves */ cpuid_count(op, i, &eax, &ebx, &ecx, &edx); cache_eax.full = eax; } while (cache_eax.split.type != CTYPE_NULL); @@ -312,9 +311,9 @@ void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, u16 die_id) num_sharing_cache = ((eax >> 14) & 0xfff) + 1; if (num_sharing_cache) { - int bits = get_count_order(num_sharing_cache); + int index_msb = get_count_order(num_sharing_cache); - c->topo.llc_id = c->topo.apicid >> bits; + c->topo.llc_id = c->topo.apicid >> index_msb; } } } @@ -335,14 +334,10 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c) { struct cpu_cacheinfo *ci = get_cpu_cacheinfo(c->cpu_index); - if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + if (boot_cpu_has(X86_FEATURE_TOPOEXT)) ci->num_leaves = find_num_cache_leaves(c); - } else if (c->extended_cpuid_level >= 0x80000006) { - if (cpuid_edx(0x80000006) & 0xf000) - ci->num_leaves = 4; - else - ci->num_leaves = 3; - } + else if (c->extended_cpuid_level >= 0x80000006) + ci->num_leaves = (cpuid_edx(0x80000006) & 0xf000) ? 4 : 3; } void init_hygon_cacheinfo(struct cpuinfo_x86 *c) @@ -469,6 +464,9 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) intel_cacheinfo_0x2(c); } +/* + * linux/cacheinfo.h shared_cpu_map setup, AMD/Hygon + */ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, const struct _cpuid4_info *id4) { @@ -485,12 +483,12 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, this_cpu_ci = get_cpu_cacheinfo(i); if (!this_cpu_ci->info_list) continue; + ci = this_cpu_ci->info_list + index; for_each_cpu(sibling, cpu_llc_shared_mask(cpu)) { if (!cpu_online(sibling)) continue; - cpumask_set_cpu(sibling, - &ci->shared_cpu_map); + cpumask_set_cpu(sibling, &ci->shared_cpu_map); } } } else if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { @@ -516,8 +514,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, apicid = cpu_data(sibling).topo.apicid; if ((apicid < first) || (apicid > last)) continue; - cpumask_set_cpu(sibling, - &ci->shared_cpu_map); + cpumask_set_cpu(sibling, &ci->shared_cpu_map); } } } else @@ -526,17 +523,19 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, return 1; } +/* + * linux/cacheinfo.h shared_cpu_map setup, Intel + fallback AMD/Hygon + */ static void __cache_cpumap_setup(unsigned int cpu, int index, const struct _cpuid4_info *id4) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + struct cpuinfo_x86 *c = &cpu_data(cpu); struct cacheinfo *ci, *sibling_ci; unsigned long num_threads_sharing; int index_msb, i; - struct cpuinfo_x86 *c = &cpu_data(cpu); - if (c->x86_vendor == X86_VENDOR_AMD || - c->x86_vendor == X86_VENDOR_HYGON) { + if (c->x86_vendor == X86_VENDOR_AMD || c->x86_vendor == X86_VENDOR_HYGON) { if (__cache_amd_cpumap_setup(cpu, index, id4)) return; } @@ -554,8 +553,10 @@ static void __cache_cpumap_setup(unsigned int cpu, int index, if (cpu_data(i).topo.apicid >> index_msb == c->topo.apicid >> index_msb) { struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i); + /* Skip if itself or no cacheinfo */ if (i == cpu || !sib_cpu_ci->info_list) - continue;/* skip if itself or no cacheinfo */ + continue; + sibling_ci = sib_cpu_ci->info_list + index; cpumask_set_cpu(i, &ci->shared_cpu_map); cpumask_set_cpu(cpu, &sibling_ci->shared_cpu_map); @@ -589,7 +590,7 @@ int init_cache_level(unsigned int cpu) } /* - * The max shared threads number comes from CPUID.4:EAX[25-14] with input + * The max shared threads number comes from CPUID(4) EAX[25-14] with input * ECX as cache index. Then right shift apicid by the number's order to get * cache id for this cache node. */ @@ -626,8 +627,8 @@ int populate_cache_leaves(unsigned int cpu) ci_info_init(ci++, &id4, nb); __cache_cpumap_setup(cpu, idx, &id4); } - this_cpu_ci->cpu_map_populated = true; + this_cpu_ci->cpu_map_populated = true; return 0; } @@ -659,12 +660,10 @@ void cache_disable(void) __acquires(cache_disable_lock) unsigned long cr0; /* - * Note that this is not ideal - * since the cache is only flushed/disabled for this CPU while the - * MTRRs are changed, but changing this requires more invasive - * changes to the way the kernel boots + * This is not ideal since the cache is only flushed/disabled + * for this CPU while the MTRRs are changed, but changing this + * requires more invasive changes to the way the kernel boots. */ - raw_spin_lock(&cache_disable_lock); /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ From 0405d4b63d082861f4eaff9d39c78ee9dc34f845 Mon Sep 17 00:00:00 2001 From: Edward Adam Davis Date: Fri, 4 Apr 2025 13:31:29 +0800 Subject: [PATCH 050/101] isofs: Prevent the use of too small fid syzbot reported a slab-out-of-bounds Read in isofs_fh_to_parent. [1] The handle_bytes value passed in by the reproducing program is equal to 12. In handle_to_path(), only 12 bytes of memory are allocated for the structure file_handle->f_handle member, which causes an out-of-bounds access when accessing the member parent_block of the structure isofs_fid in isofs, because accessing parent_block requires at least 16 bytes of f_handle. Here, fh_len is used to indirectly confirm that the value of handle_bytes is greater than 3 before accessing parent_block. [1] BUG: KASAN: slab-out-of-bounds in isofs_fh_to_parent+0x1b8/0x210 fs/isofs/export.c:183 Read of size 4 at addr ffff0000cc030d94 by task syz-executor215/6466 CPU: 1 UID: 0 PID: 6466 Comm: syz-executor215 Not tainted 6.14.0-rc7-syzkaller-ga2392f333575 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 02/12/2025 Call trace: show_stack+0x2c/0x3c arch/arm64/kernel/stacktrace.c:466 (C) __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0xe4/0x150 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:408 [inline] print_report+0x198/0x550 mm/kasan/report.c:521 kasan_report+0xd8/0x138 mm/kasan/report.c:634 __asan_report_load4_noabort+0x20/0x2c mm/kasan/report_generic.c:380 isofs_fh_to_parent+0x1b8/0x210 fs/isofs/export.c:183 exportfs_decode_fh_raw+0x2dc/0x608 fs/exportfs/expfs.c:523 do_handle_to_path+0xa0/0x198 fs/fhandle.c:257 handle_to_path fs/fhandle.c:385 [inline] do_handle_open+0x8cc/0xb8c fs/fhandle.c:403 __do_sys_open_by_handle_at fs/fhandle.c:443 [inline] __se_sys_open_by_handle_at fs/fhandle.c:434 [inline] __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:434 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x84/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Allocated by task 6466: kasan_save_stack mm/kasan/common.c:47 [inline] kasan_save_track+0x40/0x78 mm/kasan/common.c:68 kasan_save_alloc_info+0x40/0x50 mm/kasan/generic.c:562 poison_kmalloc_redzone mm/kasan/common.c:377 [inline] __kasan_kmalloc+0xac/0xc4 mm/kasan/common.c:394 kasan_kmalloc include/linux/kasan.h:260 [inline] __do_kmalloc_node mm/slub.c:4294 [inline] __kmalloc_noprof+0x32c/0x54c mm/slub.c:4306 kmalloc_noprof include/linux/slab.h:905 [inline] handle_to_path fs/fhandle.c:357 [inline] do_handle_open+0x5a4/0xb8c fs/fhandle.c:403 __do_sys_open_by_handle_at fs/fhandle.c:443 [inline] __se_sys_open_by_handle_at fs/fhandle.c:434 [inline] __arm64_sys_open_by_handle_at+0x80/0x94 fs/fhandle.c:434 __invoke_syscall arch/arm64/kernel/syscall.c:35 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:49 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:132 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:151 el0_svc+0x54/0x168 arch/arm64/kernel/entry-common.c:744 el0t_64_sync_handler+0x84/0x108 arch/arm64/kernel/entry-common.c:762 el0t_64_sync+0x198/0x19c arch/arm64/kernel/entry.S:600 Reported-by: syzbot+4d7cd7dd0ce1aa8d5c65@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=4d7cd7dd0ce1aa8d5c65 Tested-by: syzbot+4d7cd7dd0ce1aa8d5c65@syzkaller.appspotmail.com CC: stable@vger.kernel.org Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Edward Adam Davis Signed-off-by: Jan Kara Link: https://patch.msgid.link/tencent_9C8CB8A7E7C6C512C7065DC98B6EDF6EC606@qq.com --- fs/isofs/export.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/isofs/export.c b/fs/isofs/export.c index 35768a63fb1d..421d247fae52 100644 --- a/fs/isofs/export.c +++ b/fs/isofs/export.c @@ -180,7 +180,7 @@ static struct dentry *isofs_fh_to_parent(struct super_block *sb, return NULL; return isofs_export_iget(sb, - fh_len > 2 ? ifid->parent_block : 0, + fh_len > 3 ? ifid->parent_block : 0, ifid->parent_offset, fh_len > 4 ? ifid->parent_generation : 0); } From e37aa1211fbfeb9d5e79f4a4b0da898e6d0d53bb Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Tue, 24 Sep 2024 10:01:28 -0700 Subject: [PATCH 051/101] x86/cpuid: Add AMX and SPEC_CTRL dependencies Add some missing dependencies to the CPUID dependency table: - All the AMX features depend on AMX_TILE - All the SPEC_CTRL features depend on SPEC_CTRL [ mingo: Keep the AMX part of the table grouped ... ] Signed-off-by: Andi Kleen Signed-off-by: Ingo Molnar Cc: "H. Peter Anvin" Cc: Sohil Mehta Cc: Tony Luck Cc: Ahmed S. Darwish Link: https://lore.kernel.org/r/20240924170128.2611854-1-ak@linux.intel.com --- arch/x86/kernel/cpu/cpuid-deps.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/x86/kernel/cpu/cpuid-deps.c b/arch/x86/kernel/cpu/cpuid-deps.c index a2fbea0be535..94c062cddfa4 100644 --- a/arch/x86/kernel/cpu/cpuid-deps.c +++ b/arch/x86/kernel/cpu/cpuid-deps.c @@ -82,8 +82,12 @@ static const struct cpuid_dep cpuid_deps[] = { { X86_FEATURE_XFD, X86_FEATURE_XSAVES }, { X86_FEATURE_XFD, X86_FEATURE_XGETBV1 }, { X86_FEATURE_AMX_TILE, X86_FEATURE_XFD }, + { X86_FEATURE_AMX_FP16, X86_FEATURE_AMX_TILE }, + { X86_FEATURE_AMX_BF16, X86_FEATURE_AMX_TILE }, + { X86_FEATURE_AMX_INT8, X86_FEATURE_AMX_TILE }, { X86_FEATURE_SHSTK, X86_FEATURE_XSAVES }, { X86_FEATURE_FRED, X86_FEATURE_LKGS }, + { X86_FEATURE_SPEC_CTRL_SSBD, X86_FEATURE_SPEC_CTRL }, {} }; From eb3a04a8516ee9b5174379306f94279fc90424c4 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 10 Feb 2025 15:11:22 +0100 Subject: [PATCH 052/101] ovl: don't allow datadir only In theory overlayfs could support upper layer directly referring to a data layer, but there's no current use case for this. Originally, when data-only layers were introduced, this wasn't allowed, only introduced by the "datadir+" feature, but without actually handling this case, resulting in an Oops. Fix by disallowing datadir without lowerdir. Reported-by: Giuseppe Scrivano Fixes: 24e16e385f22 ("ovl: add support for appending lowerdirs one by one") Cc: # v6.7 Reviewed-by: Amir Goldstein Reviewed-by: Alexander Larsson Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- fs/overlayfs/super.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index b63474d1b064..e19940d649ca 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -1138,6 +1138,11 @@ static struct ovl_entry *ovl_get_lowerstack(struct super_block *sb, return ERR_PTR(-EINVAL); } + if (ctx->nr == ctx->nr_data) { + pr_err("at least one non-data lowerdir is required\n"); + return ERR_PTR(-EINVAL); + } + err = -EINVAL; for (i = 0; i < ctx->nr; i++) { l = &ctx->lower[i]; From a6eb9a4a69cc360b930dad9dc8513f8fd9b3577f Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Mon, 10 Feb 2025 13:07:55 +0100 Subject: [PATCH 053/101] ovl: remove unused forward declaration The ovl_get_verity_xattr() function was never added, only its declaration. Signed-off-by: Giuseppe Scrivano Fixes: 184996e92e86 ("ovl: Validate verity xattr when resolving lowerdata") Reviewed-by: Amir Goldstein Reviewed-by: Alexander Larsson Reviewed-by: Christian Brauner Signed-off-by: Miklos Szeredi --- fs/overlayfs/overlayfs.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index 6f2f8f4cfbbc..aef942a758ce 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -541,8 +541,6 @@ int ovl_set_metacopy_xattr(struct ovl_fs *ofs, struct dentry *d, bool ovl_is_metacopy_dentry(struct dentry *dentry); char *ovl_get_redirect_xattr(struct ovl_fs *ofs, const struct path *path, int padding); int ovl_ensure_verity_loaded(struct path *path); -int ovl_get_verity_xattr(struct ovl_fs *ofs, const struct path *path, - u8 *digest_buf, int *buf_length); int ovl_validate_verity(struct ovl_fs *ofs, struct path *metapath, struct path *datapath); From 6c44e5354d4d16d9d891a419ca3f57abfe18ce7a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Tue, 1 Apr 2025 20:49:00 +0000 Subject: [PATCH 054/101] RAS/AMD/ATL: Include row[13] bit in row retirement Based on feedback from hardware folks, row[13] is part of the variable bits within a physical row (along with all column bits). Only half the physical addresses affected by a row are calculated if this bit is not included. Add the row[13] bit to the row retirement flow. Fixes: 3b566b30b414 ("RAS/AMD/ATL: Add MI300 row retirement support") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org Link: https://lore.kernel.org/r/20250401-fix-fmpm-extra-records-v1-1-840bcf7a8ac5@amd.com --- drivers/ras/amd/atl/umc.c | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index dc8aa12f63c8..cb8ace3d4e42 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -320,7 +320,7 @@ static unsigned long convert_dram_to_norm_addr_mi300(unsigned long addr) * See amd_atl::convert_dram_to_norm_addr_mi300() for MI300 address formats. */ #define MI300_NUM_COL BIT(HWEIGHT(MI300_UMC_MCA_COL)) -static void retire_row_mi300(struct atl_err *a_err) +static void _retire_row_mi300(struct atl_err *a_err) { unsigned long addr; struct page *p; @@ -351,6 +351,23 @@ static void retire_row_mi300(struct atl_err *a_err) } } +/* + * In addition to the column bits, the row[13] bit should also be included when + * calculating addresses affected by a physical row. + * + * Instead of running through another loop over a single bit, just run through + * the column bits twice and flip the row[13] bit in-between. + * + * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. + */ +#define MI300_UMC_MCA_ROW13 BIT(23) +static void retire_row_mi300(struct atl_err *a_err) +{ + _retire_row_mi300(a_err); + a_err->addr ^= MI300_UMC_MCA_ROW13; + _retire_row_mi300(a_err); +} + void amd_retire_dram_row(struct atl_err *a_err) { if (df_cfg.rev == DF4p5 && df_cfg.flags.heterogeneous) From cfa5f336bdbde49cf0102ab55007b34361988fd1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 2 Apr 2025 13:17:54 +0100 Subject: [PATCH 055/101] devpts: Fix type for uid and gid params Fix devpts to parse uid and gid params using the correct type so that they get interpreted in the context of the user namespace. Fixes: cc0876f817d6 ("vfs: Convert devpts to use the new mount API") Reported-by: Debarshi Ray Closes: https://github.com/containers/podman/issues/25751 Signed-off-by: Giuseppe Scrivano Signed-off-by: David Howells Link: https://lore.kernel.org/r/759134.1743596274@warthog.procyon.org.uk cc: Eric Sandeen cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/devpts/inode.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c index 42e4d6eeb29f..9c20d78e41f6 100644 --- a/fs/devpts/inode.c +++ b/fs/devpts/inode.c @@ -89,12 +89,12 @@ enum { }; static const struct fs_parameter_spec devpts_param_specs[] = { - fsparam_u32 ("gid", Opt_gid), + fsparam_gid ("gid", Opt_gid), fsparam_s32 ("max", Opt_max), fsparam_u32oct ("mode", Opt_mode), fsparam_flag ("newinstance", Opt_newinstance), fsparam_u32oct ("ptmxmode", Opt_ptmxmode), - fsparam_u32 ("uid", Opt_uid), + fsparam_uid ("uid", Opt_uid), {} }; From a94fd938df2b1628da66b498aa0eeb89593bc7a2 Mon Sep 17 00:00:00 2001 From: Xiangsheng Hou Date: Mon, 7 Apr 2025 19:50:49 +0800 Subject: [PATCH 056/101] virtiofs: add filesystem context source name check In certain scenarios, for example, during fuzz testing, the source name may be NULL, which could lead to a kernel panic. Therefore, an extra check for the source name should be added. Fixes: a62a8ef9d97d ("virtio-fs: add virtiofs filesystem") Cc: # all LTS kernels Signed-off-by: Xiangsheng Hou Link: https://lore.kernel.org/20250407115111.25535-1-xiangsheng.hou@mediatek.com Signed-off-by: Christian Brauner --- fs/fuse/virtio_fs.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c index 2c7b24cb67ad..53c2626e90e7 100644 --- a/fs/fuse/virtio_fs.c +++ b/fs/fuse/virtio_fs.c @@ -1669,6 +1669,9 @@ static int virtio_fs_get_tree(struct fs_context *fsc) unsigned int virtqueue_size; int err = -EIO; + if (!fsc->source) + return invalf(fsc, "No source specified"); + /* This gets a reference on virtio_fs object. This ptr gets installed * in fc->iq->priv. Once fuse_conn is going away, it calls ->put() * to drop the reference to this object. From bb5e07cb927724e0b47be371fa081141cfb14414 Mon Sep 17 00:00:00 2001 From: Vasiliy Kovalev Date: Sat, 19 Oct 2024 22:13:03 +0300 Subject: [PATCH 057/101] hfs/hfsplus: fix slab-out-of-bounds in hfs_bnode_read_key Syzbot reported an issue in hfs subsystem: BUG: KASAN: slab-out-of-bounds in memcpy_from_page include/linux/highmem.h:423 [inline] BUG: KASAN: slab-out-of-bounds in hfs_bnode_read fs/hfs/bnode.c:35 [inline] BUG: KASAN: slab-out-of-bounds in hfs_bnode_read_key+0x314/0x450 fs/hfs/bnode.c:70 Write of size 94 at addr ffff8880123cd100 by task syz-executor237/5102 Call Trace: __dump_stack lib/dump_stack.c:94 [inline] dump_stack_lvl+0x241/0x360 lib/dump_stack.c:120 print_address_description mm/kasan/report.c:377 [inline] print_report+0x169/0x550 mm/kasan/report.c:488 kasan_report+0x143/0x180 mm/kasan/report.c:601 kasan_check_range+0x282/0x290 mm/kasan/generic.c:189 __asan_memcpy+0x40/0x70 mm/kasan/shadow.c:106 memcpy_from_page include/linux/highmem.h:423 [inline] hfs_bnode_read fs/hfs/bnode.c:35 [inline] hfs_bnode_read_key+0x314/0x450 fs/hfs/bnode.c:70 hfs_brec_insert+0x7f3/0xbd0 fs/hfs/brec.c:159 hfs_cat_create+0x41d/0xa50 fs/hfs/catalog.c:118 hfs_mkdir+0x6c/0xe0 fs/hfs/dir.c:232 vfs_mkdir+0x2f9/0x4f0 fs/namei.c:4257 do_mkdirat+0x264/0x3a0 fs/namei.c:4280 __do_sys_mkdir fs/namei.c:4300 [inline] __se_sys_mkdir fs/namei.c:4298 [inline] __x64_sys_mkdir+0x6c/0x80 fs/namei.c:4298 do_syscall_x64 arch/x86/entry/common.c:52 [inline] do_syscall_64+0xf3/0x230 arch/x86/entry/common.c:83 entry_SYSCALL_64_after_hwframe+0x77/0x7f RIP: 0033:0x7fbdd6057a99 Add a check for key length in hfs_bnode_read_key to prevent out-of-bounds memory access. If the key length is invalid, the key buffer is cleared, improving stability and reliability. Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Reported-by: syzbot+5f3a973ed3dfb85a6683@syzkaller.appspotmail.com Closes: https://syzkaller.appspot.com/bug?extid=5f3a973ed3dfb85a6683 Cc: stable@vger.kernel.org Signed-off-by: Vasiliy Kovalev Link: https://lore.kernel.org/20241019191303.24048-1-kovalev@altlinux.org Reviewed-by: Cengiz Can Signed-off-by: Christian Brauner --- fs/hfs/bnode.c | 6 ++++++ fs/hfsplus/bnode.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c index 6add6ebfef89..cb823a8a6ba9 100644 --- a/fs/hfs/bnode.c +++ b/fs/hfs/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 1; + if (key_len > sizeof(hfs_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfs_btree_key)); + pr_err("hfs: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c index 87974d5e6791..079ea80534f7 100644 --- a/fs/hfsplus/bnode.c +++ b/fs/hfsplus/bnode.c @@ -67,6 +67,12 @@ void hfs_bnode_read_key(struct hfs_bnode *node, void *key, int off) else key_len = tree->max_key_len + 2; + if (key_len > sizeof(hfsplus_btree_key) || key_len < 1) { + memset(key, 0, sizeof(hfsplus_btree_key)); + pr_err("hfsplus: Invalid key length: %d\n", key_len); + return; + } + hfs_bnode_read(node, key, off, key_len); } From 6b395d31146a3fae775823ea8570a37b922f6685 Mon Sep 17 00:00:00 2001 From: Kashyap Desai Date: Mon, 24 Mar 2025 09:39:35 +0530 Subject: [PATCH 058/101] RDMA/bnxt_re: Fix budget handling of notification queue The cited commit in Fixes tag introduced a bug which can cause hang of completion queue processing because of notification queue budget goes to zero. Found while doing nfs over rdma mount and umount. Below message is noticed because of the existing bug. kernel: cm_destroy_id_wait_timeout: cm_id=00000000ff6c6cc6 timed out. state 11 -> 0, refcnt=1 Fix to handle this issue - Driver will not change nq->budget upon create and destroy of cq and srq rdma resources. Fixes: cb97b377a135 ("RDMA/bnxt_re: Refurbish CQ to NQ hash calculation") Link: https://patch.msgid.link/r/20250324040935.90182-1-kalesh-anakkur.purayil@broadcom.com Signed-off-by: Kashyap Desai Signed-off-by: Kalesh AP Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index 9082b3fd2b47..e14b05cd089a 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1785,8 +1785,6 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) bnxt_qplib_destroy_srq(&rdev->qplib_res, qplib_srq); ib_umem_release(srq->umem); atomic_dec(&rdev->stats.res.srq_count); - if (nq) - nq->budget--; return 0; } @@ -1908,8 +1906,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, goto fail; } } - if (nq) - nq->budget++; active_srqs = atomic_inc_return(&rdev->stats.res.srq_count); if (active_srqs > rdev->stats.res.srq_watermark) rdev->stats.res.srq_watermark = active_srqs; @@ -3079,7 +3075,6 @@ int bnxt_re_destroy_cq(struct ib_cq *ib_cq, struct ib_udata *udata) ib_umem_release(cq->umem); atomic_dec(&rdev->stats.res.cq_count); - nq->budget--; kfree(cq->cql); return 0; } From 62dd71e691109bb44ba8ad7f58b3a2ac6b69d496 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 14 Mar 2025 16:57:15 +0100 Subject: [PATCH 059/101] RDMA/ucaps: Avoid format-security warning Passing a non-literal format string to dev_set_name causes a warning: drivers/infiniband/core/ucaps.c:173:33: error: format string is not a string literal (potentially insecure) [-Werror,-Wformat-security] 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^~~~~~~~~~~~~~~~ drivers/infiniband/core/ucaps.c:173:33: note: treat the string as an argument to avoid this 173 | ret = dev_set_name(&ucap->dev, ucap_names[type]); | ^ | "%s", Turn the name into the %s argument as suggested by gcc. Fixes: 61e51682816d ("RDMA/uverbs: Introduce UCAP (User CAPabilities) API") Link: https://patch.msgid.link/r/20250314155721.264083-1-arnd@kernel.org Signed-off-by: Arnd Bergmann Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/core/ucaps.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/core/ucaps.c b/drivers/infiniband/core/ucaps.c index 6853c6d078f9..de5cb8bf0a61 100644 --- a/drivers/infiniband/core/ucaps.c +++ b/drivers/infiniband/core/ucaps.c @@ -170,7 +170,7 @@ int ib_create_ucap(enum rdma_user_cap type) ucap->dev.class = &ucaps_class; ucap->dev.devt = MKDEV(MAJOR(ucaps_base_dev), type); ucap->dev.release = ucap_dev_release; - ret = dev_set_name(&ucap->dev, ucap_names[type]); + ret = dev_set_name(&ucap->dev, "%s", ucap_names[type]); if (ret) goto err_device; From 95ba3850fed03e01b422ab5d7943aeba130c9723 Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Mon, 24 Mar 2025 20:31:32 +0800 Subject: [PATCH 060/101] RDMA/usnic: Fix passing zero to PTR_ERR in usnic_ib_pci_probe() drivers/infiniband/hw/usnic/usnic_ib_main.c:590 usnic_ib_pci_probe() warn: passing zero to 'PTR_ERR' Make usnic_ib_device_add() return NULL on fail path, also remove useless NULL check for usnic_ib_discover_pf() Fixes: e3cf00d0a87f ("IB/usnic: Add Cisco VIC low-level hardware driver") Link: https://patch.msgid.link/r/20250324123132.2392077-1-yuehaibing@huawei.com Signed-off-by: Yue Haibing Reviewed-by: Zhu Yanjun Reviewed-by: Jason Gunthorpe Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/usnic/usnic_ib_main.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/hw/usnic/usnic_ib_main.c b/drivers/infiniband/hw/usnic/usnic_ib_main.c index 4ddcd5860e0f..11eca39b73a9 100644 --- a/drivers/infiniband/hw/usnic/usnic_ib_main.c +++ b/drivers/infiniband/hw/usnic/usnic_ib_main.c @@ -397,7 +397,7 @@ static void *usnic_ib_device_add(struct pci_dev *dev) if (!us_ibdev) { usnic_err("Device %s context alloc failed\n", netdev_name(pci_get_drvdata(dev))); - return ERR_PTR(-EFAULT); + return NULL; } us_ibdev->ufdev = usnic_fwd_dev_alloc(dev); @@ -517,8 +517,8 @@ static struct usnic_ib_dev *usnic_ib_discover_pf(struct usnic_vnic *vnic) } us_ibdev = usnic_ib_device_add(parent_pci); - if (IS_ERR_OR_NULL(us_ibdev)) { - us_ibdev = us_ibdev ? us_ibdev : ERR_PTR(-EFAULT); + if (!us_ibdev) { + us_ibdev = ERR_PTR(-EFAULT); goto out; } @@ -586,10 +586,10 @@ static int usnic_ib_pci_probe(struct pci_dev *pdev, } pf = usnic_ib_discover_pf(vf->vnic); - if (IS_ERR_OR_NULL(pf)) { - usnic_err("Failed to discover pf of vnic %s with err%ld\n", - pci_name(pdev), PTR_ERR(pf)); - err = pf ? PTR_ERR(pf) : -EFAULT; + if (IS_ERR(pf)) { + err = PTR_ERR(pf); + usnic_err("Failed to discover pf of vnic %s with err%d\n", + pci_name(pdev), err); goto out_clean_vnic; } From d247667ecd6411ec5bec9a38db7feaa599ce3ee2 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Wed, 2 Apr 2025 10:09:44 +0300 Subject: [PATCH 061/101] RDMA/mlx5: Fix compilation warning when USER_ACCESS isn't set The cited commit made fs.c always compile, even when INFINIBAND_USER_ACCESS isn't set. This results in a compilation warning about an unused object when compiling with W=1 and USER_ACCESS is unset. Fix this by defining uverbs_destroy_def_handler() even when USER_ACCESS isn't set. Fixes: 36e0d433672f ("RDMA/mlx5: Compile fs.c regardless of INFINIBAND_USER_ACCESS config") Link: https://patch.msgid.link/r/20250402070944.1022093-1-mbloch@nvidia.com Signed-off-by: Mark Bloch Tested-by: Arnd Bergmann Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/fs.c | 2 -- include/rdma/ib_verbs.h | 7 +++++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/fs.c b/drivers/infiniband/hw/mlx5/fs.c index 251246c73b33..0ff9f18a71e8 100644 --- a/drivers/infiniband/hw/mlx5/fs.c +++ b/drivers/infiniband/hw/mlx5/fs.c @@ -3461,7 +3461,6 @@ DECLARE_UVERBS_NAMED_OBJECT( &UVERBS_METHOD(MLX5_IB_METHOD_STEERING_ANCHOR_DESTROY)); const struct uapi_definition mlx5_ib_flow_defs[] = { -#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_FLOW_MATCHER), UAPI_DEF_CHAIN_OBJ_TREE( @@ -3472,7 +3471,6 @@ const struct uapi_definition mlx5_ib_flow_defs[] = { UAPI_DEF_CHAIN_OBJ_TREE_NAMED( MLX5_IB_OBJECT_STEERING_ANCHOR, UAPI_DEF_IS_OBJ_SUPPORTED(mlx5_ib_shared_ft_allowed)), -#endif {}, }; diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h index d42eae69d9a8..901353796fbb 100644 --- a/include/rdma/ib_verbs.h +++ b/include/rdma/ib_verbs.h @@ -4790,7 +4790,14 @@ void roce_del_all_netdev_gids(struct ib_device *ib_dev, struct ib_ucontext *ib_uverbs_get_ucontext_file(struct ib_uverbs_file *ufile); +#if IS_ENABLED(CONFIG_INFINIBAND_USER_ACCESS) int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs); +#else +static inline int uverbs_destroy_def_handler(struct uverbs_attr_bundle *attrs) +{ + return 0; +} +#endif struct net_device *rdma_alloc_netdev(struct ib_device *device, u32 port_num, enum rdma_netdev_t type, const char *name, From 1b2fe85f3cf19026a0e9037242bcbf7e736b22e3 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Wed, 2 Apr 2025 11:26:57 +0800 Subject: [PATCH 062/101] RDMA/rxe: Fix null pointer dereference in ODP MR check The blktests/rnbd reported a null pointer dereference as following. Similar to the mlx5, introduce a is_odp_mr() to check if the odp is enabled in this mr. Workqueue: rxe_wq do_work [rdma_rxe] RIP: 0010:rxe_mr_copy+0x57/0x210 [rdma_rxe] Code: 7c 04 48 89 f3 48 89 d5 41 89 cf 45 89 c4 0f 84 dc 00 00 00 89 ca e8 f8 f8 ff ff 85 c0 0f 85 75 01 00 00 49 8b 86 f0 00 00 00 40 28 02 0f 85 98 01 00 00 41 8b 46 78 41 8b 8e 10 01 00 00 8d RSP: 0018:ffffa0aac02cfcf8 EFLAGS: 00010246 RAX: 0000000000000000 RBX: ffff9079cd440024 RCX: 0000000000000000 RDX: 000000000000003c RSI: ffff9079cd440060 RDI: ffff9079cd665600 RBP: ffff9079c0e5e45a R08: 0000000000000000 R09: 0000000000000000 R10: 000000003c000000 R11: 0000000000225510 R12: 0000000000000000 R13: 0000000000000000 R14: ffff9079cd665600 R15: 000000000000003c FS: 0000000000000000(0000) GS:ffff907ccfa80000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 0000000119498001 CR4: 00000000001726f0 Call Trace: ? __die_body+0x1e/0x60 ? page_fault_oops+0x14f/0x4c0 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? search_bpf_extables+0x5f/0x80 ? exc_page_fault+0x7e/0x180 ? asm_exc_page_fault+0x26/0x30 ? rxe_mr_copy+0x57/0x210 [rdma_rxe] ? rxe_mr_copy+0x48/0x210 [rdma_rxe] ? rxe_pool_get_index+0x50/0x90 [rdma_rxe] rxe_receiver+0x1d98/0x2530 [rdma_rxe] ? psi_task_switch+0x1ff/0x250 ? finish_task_switch+0x92/0x2d0 ? __schedule+0xbdf/0x17c0 do_task+0x65/0x1e0 [rdma_rxe] process_scheduled_works+0xaa/0x3f0 worker_thread+0x117/0x240 Fixes: d03fb5c6599e ("RDMA/rxe: Allow registering MRs for On-Demand Paging") Cc: stable@vger.kernel.org Link: https://patch.msgid.link/r/20250402032657.1762800-1-lizhijian@fujitsu.com Signed-off-by: Li Zhijian Reviewed-by: Daisuke Matsuda Reviewed-by: Zhu Yanjun Signed-off-by: Jason Gunthorpe --- drivers/infiniband/sw/rxe/rxe_loc.h | 6 ++++++ drivers/infiniband/sw/rxe/rxe_mr.c | 4 ++-- drivers/infiniband/sw/rxe/rxe_resp.c | 4 ++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/infiniband/sw/rxe/rxe_loc.h b/drivers/infiniband/sw/rxe/rxe_loc.h index feb386d98d1d..0bc3fbb6554f 100644 --- a/drivers/infiniband/sw/rxe/rxe_loc.h +++ b/drivers/infiniband/sw/rxe/rxe_loc.h @@ -140,6 +140,12 @@ static inline int qp_mtu(struct rxe_qp *qp) return IB_MTU_4096; } +static inline bool is_odp_mr(struct rxe_mr *mr) +{ + return IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && mr->umem && + mr->umem->is_odp; +} + void free_rd_atomic_resource(struct resp_res *res); static inline void rxe_advance_resp_resource(struct rxe_qp *qp) diff --git a/drivers/infiniband/sw/rxe/rxe_mr.c b/drivers/infiniband/sw/rxe/rxe_mr.c index 868d2f0b74e9..432d864c3ce9 100644 --- a/drivers/infiniband/sw/rxe/rxe_mr.c +++ b/drivers/infiniband/sw/rxe/rxe_mr.c @@ -323,7 +323,7 @@ int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, return err; } - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return rxe_odp_mr_copy(mr, iova, addr, length, dir); else return rxe_mr_copy_xarray(mr, iova, addr, length, dir); @@ -536,7 +536,7 @@ int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value) u64 *va; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* See IBA oA19-28 */ diff --git a/drivers/infiniband/sw/rxe/rxe_resp.c b/drivers/infiniband/sw/rxe/rxe_resp.c index 54ba9ee1acc5..5d9174e408db 100644 --- a/drivers/infiniband/sw/rxe/rxe_resp.c +++ b/drivers/infiniband/sw/rxe/rxe_resp.c @@ -650,7 +650,7 @@ static enum resp_states process_flush(struct rxe_qp *qp, struct resp_res *res = qp->resp.res; /* ODP is not supported right now. WIP. */ - if (mr->umem->is_odp) + if (is_odp_mr(mr)) return RESPST_ERR_UNSUPPORTED_OPCODE; /* oA19-14, oA19-15 */ @@ -706,7 +706,7 @@ static enum resp_states atomic_reply(struct rxe_qp *qp, if (!res->replay) { u64 iova = qp->resp.va + qp->resp.offset; - if (mr->umem->is_odp) + if (is_odp_mr(mr)) err = rxe_odp_atomic_op(mr, iova, pkt->opcode, atmeth_comp(pkt), atmeth_swap_add(pkt), From 9beb2c91fb86e0be70a5833c6730441fa3c9efa8 Mon Sep 17 00:00:00 2001 From: Chengchang Tang Date: Thu, 27 Mar 2025 19:47:24 +0800 Subject: [PATCH 063/101] RDMA/hns: Fix wrong maximum DMA segment size Set maximum DMA segment size to 2G instead of UINT_MAX due to HW limit. Fixes: e0477b34d9d1 ("RDMA: Explicitly pass in the dma_device to ib_register_device") Link: https://patch.msgid.link/r/20250327114724.3454268-3-huangjunxian6@hisilicon.com Signed-off-by: Chengchang Tang Signed-off-by: Junxian Huang Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/hns/hns_roce_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/hns/hns_roce_main.c b/drivers/infiniband/hw/hns/hns_roce_main.c index cf89a8db4f64..8d0b63d4b50a 100644 --- a/drivers/infiniband/hw/hns/hns_roce_main.c +++ b/drivers/infiniband/hw/hns/hns_roce_main.c @@ -763,7 +763,7 @@ static int hns_roce_register_device(struct hns_roce_dev *hr_dev) if (ret) return ret; } - dma_set_max_seg_size(dev, UINT_MAX); + dma_set_max_seg_size(dev, SZ_2G); ret = ib_register_device(ib_dev, "hns_%d", dev); if (ret) { dev_err(dev, "ib_register_device failed!\n"); From 58029c39cdc54ac4f4dc40b4a9c05eed9f9b808a Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 27 Feb 2025 19:31:32 +0000 Subject: [PATCH 064/101] RAS/AMD/FMPM: Get masked address Some operations require checking, or ignoring, specific bits in an address value. For example, this can be comparing address values to identify unique structures. Currently, the full address value is compared when filtering for duplicates. This results in over counting and creation of extra records. This gives the impression that more unique events occurred than did in reality. Mask the address for physical rows on MI300. [ bp: Simplify. ] Fixes: 6f15e617cc99 ("RAS: Introduce a FRU memory poison manager") Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov (AMD) Cc: stable@vger.kernel.org --- drivers/ras/amd/atl/internal.h | 3 +++ drivers/ras/amd/atl/umc.c | 2 -- drivers/ras/amd/fmpm.c | 9 ++++++++- 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f9be26d25348..d096b58cd0ae 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -362,4 +362,7 @@ static inline void atl_debug_on_bad_intlv_mode(struct addr_ctx *ctx) atl_debug(ctx, "Unrecognized interleave mode: %u", ctx->map.intlv_mode); } +#define MI300_UMC_MCA_COL GENMASK(5, 1) +#define MI300_UMC_MCA_ROW13 BIT(23) + #endif /* __AMD_ATL_INTERNAL_H__ */ diff --git a/drivers/ras/amd/atl/umc.c b/drivers/ras/amd/atl/umc.c index cb8ace3d4e42..6e072b7667e9 100644 --- a/drivers/ras/amd/atl/umc.c +++ b/drivers/ras/amd/atl/umc.c @@ -229,7 +229,6 @@ int get_umc_info_mi300(void) * Additionally, the PC and Bank bits may be hashed. This must be accounted for before * reconstructing the normalized address. */ -#define MI300_UMC_MCA_COL GENMASK(5, 1) #define MI300_UMC_MCA_BANK GENMASK(9, 6) #define MI300_UMC_MCA_ROW GENMASK(24, 10) #define MI300_UMC_MCA_PC BIT(25) @@ -360,7 +359,6 @@ static void _retire_row_mi300(struct atl_err *a_err) * * See MI300_UMC_MCA_ROW for the row bits in MCA_ADDR_UMC value. */ -#define MI300_UMC_MCA_ROW13 BIT(23) static void retire_row_mi300(struct atl_err *a_err) { _retire_row_mi300(a_err); diff --git a/drivers/ras/amd/fmpm.c b/drivers/ras/amd/fmpm.c index 90de737fbc90..8877c6ff64c4 100644 --- a/drivers/ras/amd/fmpm.c +++ b/drivers/ras/amd/fmpm.c @@ -250,6 +250,13 @@ static bool rec_has_valid_entries(struct fru_rec *rec) return true; } +/* + * Row retirement is done on MI300 systems, and some bits are 'don't + * care' for comparing addresses with unique physical rows. This + * includes all column bits and the row[13] bit. + */ +#define MASK_ADDR(addr) ((addr) & ~(MI300_UMC_MCA_ROW13 | MI300_UMC_MCA_COL)) + static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_desc *new) { /* @@ -258,7 +265,7 @@ static bool fpds_equal(struct cper_fru_poison_desc *old, struct cper_fru_poison_ * * Also, order the checks from most->least likely to fail to shortcut the code. */ - if (old->addr != new->addr) + if (MASK_ADDR(old->addr) != MASK_ADDR(new->addr)) return false; if (old->hw_id != new->hw_id) From 45f5dcdd049719fb999393b30679605f16ebce14 Mon Sep 17 00:00:00 2001 From: Sharath Srinivasan Date: Wed, 26 Mar 2025 14:05:32 -0700 Subject: [PATCH 065/101] RDMA/cma: Fix workqueue crash in cma_netevent_work_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct rdma_cm_id has member "struct work_struct net_work" that is reused for enqueuing cma_netevent_work_handler()s onto cma_wq. Below crash[1] can occur if more than one call to cma_netevent_callback() occurs in quick succession, which further enqueues cma_netevent_work_handler()s for the same rdma_cm_id, overwriting any previously queued work-item(s) that was just scheduled to run i.e. there is no guarantee the queued work item may run between two successive calls to cma_netevent_callback() and the 2nd INIT_WORK would overwrite the 1st work item (for the same rdma_cm_id), despite grabbing id_table_lock during enqueue. Also drgn analysis [2] indicates the work item was likely overwritten. Fix this by moving the INIT_WORK() to __rdma_create_id(), so that it doesn't race with any existing queue_work() or its worker thread. [1] Trimmed crash stack: ============================================= BUG: kernel NULL pointer dereference, address: 0000000000000008 kworker/u256:6 ... 6.12.0-0... Workqueue: cma_netevent_work_handler [rdma_cm] (rdma_cm) RIP: 0010:process_one_work+0xba/0x31a Call Trace: worker_thread+0x266/0x3a0 kthread+0xcf/0x100 ret_from_fork+0x31/0x50 ret_from_fork_asm+0x1a/0x30 ============================================= [2] drgn crash analysis: >>> trace = prog.crashed_thread().stack_trace() >>> trace (0) crash_setup_regs (./arch/x86/include/asm/kexec.h:111:15) (1) __crash_kexec (kernel/crash_core.c:122:4) (2) panic (kernel/panic.c:399:3) (3) oops_end (arch/x86/kernel/dumpstack.c:382:3) ... (8) process_one_work (kernel/workqueue.c:3168:2) (9) process_scheduled_works (kernel/workqueue.c:3310:3) (10) worker_thread (kernel/workqueue.c:3391:4) (11) kthread (kernel/kthread.c:389:9) Line workqueue.c:3168 for this kernel version is in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); >>> trace[8]["work"] *(struct work_struct *)0xffff92577d0a21d8 = { .data = (atomic_long_t){ .counter = (s64)536870912, <=== Note }, .entry = (struct list_head){ .next = (struct list_head *)0xffff924d075924c0, .prev = (struct list_head *)0xffff924d075924c0, }, .func = (work_func_t)cma_netevent_work_handler+0x0 = 0xffffffffc2cec280, } Suspicion is that pwq is NULL: >>> trace[8]["pwq"] (struct pool_workqueue *) In process_one_work(), pwq is assigned from: struct pool_workqueue *pwq = get_work_pwq(work); and get_work_pwq() is: static struct pool_workqueue *get_work_pwq(struct work_struct *work) { unsigned long data = atomic_long_read(&work->data); if (data & WORK_STRUCT_PWQ) return work_struct_pwq(data); else return NULL; } WORK_STRUCT_PWQ is 0x4: >>> print(repr(prog['WORK_STRUCT_PWQ'])) Object(prog, 'enum work_flags', value=4) But work->data is 536870912 which is 0x20000000. So, get_work_pwq() returns NULL and we crash in process_one_work(): 3168 strscpy(worker->desc, pwq->wq->name, WORKER_DESC_LEN); ============================================= Fixes: 925d046e7e52 ("RDMA/core: Add a netevent notifier to cma") Cc: stable@vger.kernel.org Co-developed-by: HÃ¥kon Bugge Signed-off-by: HÃ¥kon Bugge Signed-off-by: Sharath Srinivasan Reviewed-by: Patrisious Haddad Link: https://patch.msgid.link/bf0082f9-5b25-4593-92c6-d130aa8ba439@oracle.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/cma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/infiniband/core/cma.c b/drivers/infiniband/core/cma.c index fedcdb56fb6b..ab31eefa916b 100644 --- a/drivers/infiniband/core/cma.c +++ b/drivers/infiniband/core/cma.c @@ -72,6 +72,8 @@ static const char * const cma_events[] = { static void cma_iboe_set_mgid(struct sockaddr *addr, union ib_gid *mgid, enum ib_gid_type gid_type); +static void cma_netevent_work_handler(struct work_struct *_work); + const char *__attribute_const__ rdma_event_msg(enum rdma_cm_event_type event) { size_t index = event; @@ -1047,6 +1049,7 @@ __rdma_create_id(struct net *net, rdma_cm_event_handler event_handler, get_random_bytes(&id_priv->seq_num, sizeof id_priv->seq_num); id_priv->id.route.addr.dev_addr.net = get_net(net); id_priv->seq_num &= 0x00ffffff; + INIT_WORK(&id_priv->id.net_work, cma_netevent_work_handler); rdma_restrack_new(&id_priv->res, RDMA_RESTRACK_CM_ID); if (parent) @@ -5241,7 +5244,6 @@ static int cma_netevent_callback(struct notifier_block *self, if (!memcmp(current_id->id.route.addr.dev_addr.dst_dev_addr, neigh->ha, ETH_ALEN)) continue; - INIT_WORK(¤t_id->id.net_work, cma_netevent_work_handler); cma_id_get(current_id); queue_work(cma_wq, ¤t_id->id.net_work); } From 9a0e6f15029e1a8a21e40f06fd05aa52b7f063de Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Wed, 19 Mar 2025 14:42:21 +0200 Subject: [PATCH 066/101] RDMA/core: Silence oversized kvmalloc() warning syzkaller triggered an oversized kvmalloc() warning. Silence it by adding __GFP_NOWARN. syzkaller log: WARNING: CPU: 7 PID: 518 at mm/util.c:665 __kvmalloc_node_noprof+0x175/0x180 CPU: 7 UID: 0 PID: 518 Comm: c_repro Not tainted 6.11.0-rc6+ #6 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:__kvmalloc_node_noprof+0x175/0x180 RSP: 0018:ffffc90001e67c10 EFLAGS: 00010246 RAX: 0000000000000100 RBX: 0000000000000400 RCX: ffffffff8149d46b RDX: 0000000000000000 RSI: ffff8881030fae80 RDI: 0000000000000002 RBP: 000000712c800000 R08: 0000000000000100 R09: 0000000000000000 R10: ffffc90001e67c10 R11: 0030ae0601000000 R12: 0000000000000000 R13: 0000000000000000 R14: 00000000ffffffff R15: 0000000000000000 FS: 00007fde79159740(0000) GS:ffff88813bdc0000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000020000180 CR3: 0000000105eb4005 CR4: 00000000003706b0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ib_umem_odp_get+0x1f6/0x390 mlx5_ib_reg_user_mr+0x1e8/0x450 ib_uverbs_reg_mr+0x28b/0x440 ib_uverbs_write+0x7d3/0xa30 vfs_write+0x1ac/0x6c0 ksys_write+0x134/0x170 ? __sanitizer_cov_trace_pc+0x1c/0x50 do_syscall_64+0x50/0x110 entry_SYSCALL_64_after_hwframe+0x76/0x7e Fixes: 37824952dc8f ("RDMA/odp: Use kvcalloc for the dma_list and page_list") Signed-off-by: Shay Drory Link: https://patch.msgid.link/c6cb92379de668be94894f49c2cfa40e73f94d56.1742388096.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/core/umem_odp.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/core/umem_odp.c b/drivers/infiniband/core/umem_odp.c index e9fa22d31c23..c48ef6083020 100644 --- a/drivers/infiniband/core/umem_odp.c +++ b/drivers/infiniband/core/umem_odp.c @@ -76,12 +76,14 @@ static inline int ib_init_umem_odp(struct ib_umem_odp *umem_odp, npfns = (end - start) >> PAGE_SHIFT; umem_odp->pfn_list = kvcalloc( - npfns, sizeof(*umem_odp->pfn_list), GFP_KERNEL); + npfns, sizeof(*umem_odp->pfn_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->pfn_list) return -ENOMEM; umem_odp->dma_list = kvcalloc( - ndmas, sizeof(*umem_odp->dma_list), GFP_KERNEL); + ndmas, sizeof(*umem_odp->dma_list), + GFP_KERNEL | __GFP_NOWARN); if (!umem_odp->dma_list) { ret = -ENOMEM; goto out_pfn_list; From d274cde0dbe0217ee2f2ddbb1a3c545dedf81a06 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Wed, 9 Apr 2025 14:22:30 +0200 Subject: [PATCH 067/101] x86/cacheinfo: Properly parse CPUID(0x80000005) L1d/L1i associativity For the AMD CPUID(4) emulation cache info logic, the same associativity mapping array, assocs[], is used for both CPUID(0x80000005) and CPUID(0x80000006). This is incorrect since per the AMD manuals, the mappings for CPUID(0x80000005) L1d/L1i associativity is: n = 0x1 -> 0xfe n n = 0xff fully associative while assocs[] maps these values to: n = 0x1, 0x2, 0x4 n n = 0x3, 0x7, 0x9 0 n = 0x6 8 n = 0x8 16 n = 0xa 32 n = 0xb 48 n = 0xc 64 n = 0xd 96 n = 0xe 128 n = 0xf fully associative which is only valid for CPUID(0x80000006). Parse CPUID(0x80000005) L1d/L1i associativity values as shown in the AMD manuals. Since the 0xffff literal is used to denote full associativity at the AMD CPUID(4)-emulation logic, define AMD_CPUID4_FULLY_ASSOCIATIVE for it instead of spreading that literal in more places. Mark the assocs[] mapping array as only valid for CPUID(0x80000006) L2/L3 cache information. Fixes: a326e948c538 ("x86, cacheinfo: Fixup L3 cache information for AMD multi-node processors") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: "H. Peter Anvin" Cc: John Ogness Cc: x86-cpuid@lists.linux.dev Link: https://lore.kernel.org/r/20250409122233.1058601-2-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index cd48d34ac04b..f4817cd50cfb 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -91,6 +91,8 @@ static const enum cache_type cache_type_map[] = { * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) */ +#define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff + union l1_cache { struct { unsigned line_size :8; @@ -122,6 +124,7 @@ union l3_cache { unsigned int val; }; +/* L2/L3 associativity mapping */ static const unsigned short assocs[] = { [1] = 1, [2] = 2, @@ -133,7 +136,7 @@ static const unsigned short assocs[] = { [0xc] = 64, [0xd] = 96, [0xe] = 128, - [0xf] = 0xffff /* Fully associative */ + [0xf] = AMD_CPUID4_FULLY_ASSOCIATIVE }; static const unsigned char levels[] = { 1, 1, 2, 3 }; @@ -163,7 +166,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, if (!l1->val) return; - assoc = assocs[l1->assoc]; + assoc = (l1->assoc == 0xff) ? AMD_CPUID4_FULLY_ASSOCIATIVE : l1->assoc; line_size = l1->line_size; lines_per_tag = l1->lines_per_tag; size_in_kb = l1->size_in_kb; @@ -201,7 +204,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, eax->split.num_threads_sharing = 0; eax->split.num_cores_on_die = topology_num_cores_per_package(); - if (assoc == 0xffff) + if (assoc == AMD_CPUID4_FULLY_ASSOCIATIVE) eax->split.is_fully_associative = 1; ebx->split.coherency_line_size = line_size - 1; From d02c83d75f9f76dda046edbd9f39b911427677c9 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Wed, 9 Apr 2025 14:22:31 +0200 Subject: [PATCH 068/101] x86/cacheinfo: Properly parse CPUID(0x80000006) L2/L3 associativity Complete the AMD CPUID(4) emulation logic, which uses CPUID(0x80000006) for L2/L3 cache info and an assocs[] associativity mapping array, by adding entries for 3-way caches and 6-way caches. Properly handle the case where CPUID(0x80000006) returns an L2/L3 associativity of 9. This is not real associativity, but a marker to indicate that the respective L2/L3 cache information should be retrieved from CPUID(0x8000001d) instead. If such a marker is encountered, return early from legacy_amd_cpuid4(), thus effectively emulating an "invalid index" CPUID(4) response with a cache type of zero. When checking if CPUID(0x80000006) L2/L3 cache info output is valid, and given the associtivity marker 9 above, do not just check if the whole ECX/EDX register is zero. Rather, check if the associativity is zero or 9. An associativity of zero implies no L2/L3 cache, which make it the more correct check anyway vs. a zero check of the whole output register. Fixes: a326e948c538 ("x86, cacheinfo: Fixup L3 cache information for AMD multi-node processors") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: "H. Peter Anvin" Cc: John Ogness Cc: x86-cpuid@lists.linux.dev Link: https://lore.kernel.org/r/20250409122233.1058601-3-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index f4817cd50cfb..52727f8c0006 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -89,9 +89,13 @@ static const enum cache_type cache_type_map[] = { /* * Fallback AMD CPUID(4) emulation * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) + * + * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should + * be determined from CPUID(0x8000001d) instead of CPUID(0x80000006). */ #define AMD_CPUID4_FULLY_ASSOCIATIVE 0xffff +#define AMD_L2_L3_INVALID_ASSOC 0x9 union l1_cache { struct { @@ -128,7 +132,9 @@ union l3_cache { static const unsigned short assocs[] = { [1] = 1, [2] = 2, + [3] = 3, [4] = 4, + [5] = 6, [6] = 8, [8] = 16, [0xa] = 32, @@ -172,7 +178,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, size_in_kb = l1->size_in_kb; break; case 2: - if (!l2.val) + if (!l2.assoc || l2.assoc == AMD_L2_L3_INVALID_ASSOC) return; /* Use x86_cache_size as it might have K7 errata fixes */ @@ -182,7 +188,7 @@ static void legacy_amd_cpuid4(int index, union _cpuid4_leaf_eax *eax, size_in_kb = __this_cpu_read(cpu_info.x86_cache_size); break; case 3: - if (!l3.val) + if (!l3.assoc || l3.assoc == AMD_L2_L3_INVALID_ASSOC) return; assoc = assocs[l3.assoc]; From ddc592972ff4f1350f456edc3047fc5fb01777aa Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:16 -0700 Subject: [PATCH 069/101] tools headers: Update the KVM headers with the kernel sources To pick up the changes in: af5366bea2cb9dfb KVM: x86: Drop the now unused KVM_X86_DISABLE_VALID_EXITS 915d2f0718a42ee0 KVM: Move KVM_REG_SIZE() definition to common uAPI header 5c17848134ab1ffb KVM: x86/xen: Restrict hypercall MSR to unofficial synthetic range 9364789567f9b492 KVM: x86: Add a VM type define for TDX fa662c9080732b1f KVM: SVM: Add Idle HLT intercept support 3adaee78306148da KVM: arm64: Allow userspace to change the implementation ID registers faf7714a47a25c62 KVM: arm64: nv: Allow userland to set VGIC maintenance IRQ c0000e58c74eed07 KVM: arm64: Introduce KVM_REG_ARM_VENDOR_HYP_BMAP_2 f83c41fb3dddbf47 KVM: arm64: Allow userspace to limit NV support to nVHE Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/kvm.h include/uapi/linux/kvm.h diff -u tools/arch/x86/include/uapi/asm/kvm.h arch/x86/include/uapi/asm/kvm.h diff -u tools/arch/x86/include/uapi/asm/svm.h arch/x86/include/uapi/asm/svm.h diff -u tools/arch/arm64/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: kvm@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-2-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/kvm.h | 5 ++--- tools/arch/x86/include/uapi/asm/kvm.h | 4 ++++ tools/arch/x86/include/uapi/asm/svm.h | 2 ++ tools/include/uapi/linux/kvm.h | 9 +++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 6d44f8c8a18f..af9d9acaf997 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -43,9 +43,6 @@ #define KVM_COALESCED_MMIO_PAGE_OFFSET 1 #define KVM_DIRTY_LOG_PAGE_OFFSET 64 -#define KVM_REG_SIZE(id) \ - (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) - struct kvm_regs { struct user_pt_regs regs; /* sp = sp_el0 */ @@ -108,6 +105,7 @@ struct kvm_regs { #define KVM_ARM_VCPU_PTRAUTH_ADDRESS 5 /* VCPU uses address authentication */ #define KVM_ARM_VCPU_PTRAUTH_GENERIC 6 /* VCPU uses generic authentication */ #define KVM_ARM_VCPU_HAS_EL2 7 /* Support nested virtualization */ +#define KVM_ARM_VCPU_HAS_EL2_E2H0 8 /* Limit NV support to E2H RES0 */ struct kvm_vcpu_init { __u32 target; @@ -418,6 +416,7 @@ enum { #define KVM_DEV_ARM_VGIC_GRP_CPU_SYSREGS 6 #define KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO 7 #define KVM_DEV_ARM_VGIC_GRP_ITS_REGS 8 +#define KVM_DEV_ARM_VGIC_GRP_MAINT_IRQ 9 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT 10 #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_MASK \ (0x3fffffULL << KVM_DEV_ARM_VGIC_LINE_LEVEL_INFO_SHIFT) diff --git a/tools/arch/x86/include/uapi/asm/kvm.h b/tools/arch/x86/include/uapi/asm/kvm.h index 88585c1de416..460306b35a4b 100644 --- a/tools/arch/x86/include/uapi/asm/kvm.h +++ b/tools/arch/x86/include/uapi/asm/kvm.h @@ -559,6 +559,9 @@ struct kvm_x86_mce { #define KVM_XEN_HVM_CONFIG_PVCLOCK_TSC_UNSTABLE (1 << 7) #define KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA (1 << 8) +#define KVM_XEN_MSR_MIN_INDEX 0x40000000u +#define KVM_XEN_MSR_MAX_INDEX 0x4fffffffu + struct kvm_xen_hvm_config { __u32 flags; __u32 msr; @@ -925,5 +928,6 @@ struct kvm_hyperv_eventfd { #define KVM_X86_SEV_VM 2 #define KVM_X86_SEV_ES_VM 3 #define KVM_X86_SNP_VM 4 +#define KVM_X86_TDX_VM 5 #endif /* _ASM_X86_KVM_H */ diff --git a/tools/arch/x86/include/uapi/asm/svm.h b/tools/arch/x86/include/uapi/asm/svm.h index 1814b413fd57..ec1321248dac 100644 --- a/tools/arch/x86/include/uapi/asm/svm.h +++ b/tools/arch/x86/include/uapi/asm/svm.h @@ -95,6 +95,7 @@ #define SVM_EXIT_CR14_WRITE_TRAP 0x09e #define SVM_EXIT_CR15_WRITE_TRAP 0x09f #define SVM_EXIT_INVPCID 0x0a2 +#define SVM_EXIT_IDLE_HLT 0x0a6 #define SVM_EXIT_NPF 0x400 #define SVM_EXIT_AVIC_INCOMPLETE_IPI 0x401 #define SVM_EXIT_AVIC_UNACCELERATED_ACCESS 0x402 @@ -224,6 +225,7 @@ { SVM_EXIT_CR4_WRITE_TRAP, "write_cr4_trap" }, \ { SVM_EXIT_CR8_WRITE_TRAP, "write_cr8_trap" }, \ { SVM_EXIT_INVPCID, "invpcid" }, \ + { SVM_EXIT_IDLE_HLT, "idle-halt" }, \ { SVM_EXIT_NPF, "npf" }, \ { SVM_EXIT_AVIC_INCOMPLETE_IPI, "avic_incomplete_ipi" }, \ { SVM_EXIT_AVIC_UNACCELERATED_ACCESS, "avic_unaccelerated_access" }, \ diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 502ea63b5d2e..b6ae8ad8934b 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -617,10 +617,6 @@ struct kvm_ioeventfd { #define KVM_X86_DISABLE_EXITS_HLT (1 << 1) #define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) #define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3) -#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ - KVM_X86_DISABLE_EXITS_HLT | \ - KVM_X86_DISABLE_EXITS_PAUSE | \ - KVM_X86_DISABLE_EXITS_CSTATE) /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { @@ -933,6 +929,7 @@ struct kvm_enable_cap { #define KVM_CAP_PRE_FAULT_MEMORY 236 #define KVM_CAP_X86_APIC_BUS_CYCLES_NS 237 #define KVM_CAP_X86_GUEST_MODE 238 +#define KVM_CAP_ARM_WRITABLE_IMP_ID_REGS 239 struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1070,6 +1067,10 @@ struct kvm_dirty_tlb { #define KVM_REG_SIZE_SHIFT 52 #define KVM_REG_SIZE_MASK 0x00f0000000000000ULL + +#define KVM_REG_SIZE(id) \ + (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT)) + #define KVM_REG_SIZE_U8 0x0000000000000000ULL #define KVM_REG_SIZE_U16 0x0010000000000000ULL #define KVM_REG_SIZE_U32 0x0020000000000000ULL From 9dbe66640f43a3530a0e7897557f4ea41c3abe85 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:17 -0700 Subject: [PATCH 070/101] tools headers: Update the socket headers with the kernel sources To pick up the changes in: 64e844505bc08cde include: uapi: protocol number and packet structs for AGGFRAG in ESP 18912c520674ec4d tcp: devmem: don't write truncated dmabuf CMSGs to userspace Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/in.h include/uapi/linux/in.h diff -u tools/perf/trace/beauty/include/linux/socket.h include/linux/socket.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: netdev@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-3-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/in.h | 2 ++ tools/perf/trace/beauty/include/linux/socket.h | 2 ++ 2 files changed, 4 insertions(+) diff --git a/tools/include/uapi/linux/in.h b/tools/include/uapi/linux/in.h index 5d32d53508d9..ced0fc3c3aa5 100644 --- a/tools/include/uapi/linux/in.h +++ b/tools/include/uapi/linux/in.h @@ -79,6 +79,8 @@ enum { #define IPPROTO_MPLS IPPROTO_MPLS IPPROTO_ETHERNET = 143, /* Ethernet-within-IPv6 Encapsulation */ #define IPPROTO_ETHERNET IPPROTO_ETHERNET + IPPROTO_AGGFRAG = 144, /* AGGFRAG in ESP (RFC 9347) */ +#define IPPROTO_AGGFRAG IPPROTO_AGGFRAG IPPROTO_RAW = 255, /* Raw IP packets */ #define IPPROTO_RAW IPPROTO_RAW IPPROTO_SMC = 256, /* Shared Memory Communications */ diff --git a/tools/perf/trace/beauty/include/linux/socket.h b/tools/perf/trace/beauty/include/linux/socket.h index d18cc47e89bd..c3322eb3d686 100644 --- a/tools/perf/trace/beauty/include/linux/socket.h +++ b/tools/perf/trace/beauty/include/linux/socket.h @@ -392,6 +392,8 @@ struct ucred { extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr); extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data); +extern int put_cmsg_notrunc(struct msghdr *msg, int level, int type, int len, + void *data); struct timespec64; struct __kernel_timespec; From ae62977331fcbf5c9a4260c88d9f94450db2d99a Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:18 -0700 Subject: [PATCH 071/101] tools headers: Update the uapi/linux/perf_event.h copy with the kernel sources To pick up the changes in: c53e14f1ea4a8f8d perf: Extend per event callchain limit to branch stack Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/perf_event.h include/uapi/linux/perf_event.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Link: https://lore.kernel.org/r/20250410001125.391820-4-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 0524d541d4e3..5fc753c23734 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -385,6 +385,8 @@ enum perf_event_read_format { * * @sample_max_stack: Max number of frame pointers in a callchain, * should be < /proc/sys/kernel/perf_event_max_stack + * Max number of entries of branch stack + * should be < hardware limit */ struct perf_event_attr { From af74e5fe7453c1cb2b86c601426cfc4ad9ea9753 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:19 -0700 Subject: [PATCH 072/101] tools headers: Update the VFS headers with the kernel sources To pick up the changes in: 7ed6cbe0f8caa6ee fs: add STATX_DIO_READ_ALIGN 8fc7e23a9bd851e6 fs: reformat the statx definition a5874fde3c0884a3 exec: Add a new AT_EXECVE_CHECK flag to execveat(2) 1ebd4a3c095cd538 blk-crypto: add ioctls to create and prepare hardware-wrapped keys af6505e5745b9f3a fs: add RWF_DONTCACHE iocb and FOP_DONTCACHE file_operations flag 10783d0ba0d7731e fs, iov_iter: define meta io descriptor 8f6116b5b77b0536 statmount: add a new supported_mask field 37c4a9590e1efcae statmount: allow to retrieve idmappings Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/stat.h include/uapi/linux/stat.h diff -u tools/perf/trace/beauty/include/uapi/linux/fcntl.h include/uapi/linux/fcntl.h diff -u tools/perf/trace/beauty/include/uapi/linux/fs.h include/uapi/linux/fs.h diff -u tools/perf/trace/beauty/include/uapi/linux/mount.h include/uapi/linux/mount.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-fsdevel@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-5-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/linux/stat.h | 99 ++++++++++++++----- .../trace/beauty/include/uapi/linux/fcntl.h | 4 + .../perf/trace/beauty/include/uapi/linux/fs.h | 21 +++- .../trace/beauty/include/uapi/linux/mount.h | 10 +- .../trace/beauty/include/uapi/linux/stat.h | 99 ++++++++++++++----- 5 files changed, 179 insertions(+), 54 deletions(-) diff --git a/tools/include/uapi/linux/stat.h b/tools/include/uapi/linux/stat.h index 887a25286441..f78ee3670dd5 100644 --- a/tools/include/uapi/linux/stat.h +++ b/tools/include/uapi/linux/stat.h @@ -98,43 +98,93 @@ struct statx_timestamp { */ struct statx { /* 0x00 */ - __u32 stx_mask; /* What results were written [uncond] */ - __u32 stx_blksize; /* Preferred general I/O size [uncond] */ - __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* What results were written [uncond] */ + __u32 stx_mask; + + /* Preferred general I/O size [uncond] */ + __u32 stx_blksize; + + /* Flags conveying information about the file [uncond] */ + __u64 stx_attributes; + /* 0x10 */ - __u32 stx_nlink; /* Number of hard links */ - __u32 stx_uid; /* User ID of owner */ - __u32 stx_gid; /* Group ID of owner */ - __u16 stx_mode; /* File mode */ + /* Number of hard links */ + __u32 stx_nlink; + + /* User ID of owner */ + __u32 stx_uid; + + /* Group ID of owner */ + __u32 stx_gid; + + /* File mode */ + __u16 stx_mode; __u16 __spare0[1]; + /* 0x20 */ - __u64 stx_ino; /* Inode number */ - __u64 stx_size; /* File size */ - __u64 stx_blocks; /* Number of 512-byte blocks allocated */ - __u64 stx_attributes_mask; /* Mask to show what's supported in stx_attributes */ + /* Inode number */ + __u64 stx_ino; + + /* File size */ + __u64 stx_size; + + /* Number of 512-byte blocks allocated */ + __u64 stx_blocks; + + /* Mask to show what's supported in stx_attributes */ + __u64 stx_attributes_mask; + /* 0x40 */ - struct statx_timestamp stx_atime; /* Last access time */ - struct statx_timestamp stx_btime; /* File creation time */ - struct statx_timestamp stx_ctime; /* Last attribute change time */ - struct statx_timestamp stx_mtime; /* Last data modification time */ + /* Last access time */ + struct statx_timestamp stx_atime; + + /* File creation time */ + struct statx_timestamp stx_btime; + + /* Last attribute change time */ + struct statx_timestamp stx_ctime; + + /* Last data modification time */ + struct statx_timestamp stx_mtime; + /* 0x80 */ - __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_major; __u32 stx_rdev_minor; - __u32 stx_dev_major; /* ID of device containing file [uncond] */ + + /* ID of device containing file [uncond] */ + __u32 stx_dev_major; __u32 stx_dev_minor; + /* 0x90 */ __u64 stx_mnt_id; - __u32 stx_dio_mem_align; /* Memory buffer alignment for direct I/O */ - __u32 stx_dio_offset_align; /* File offset alignment for direct I/O */ + + /* Memory buffer alignment for direct I/O */ + __u32 stx_dio_mem_align; + + /* File offset alignment for direct I/O */ + __u32 stx_dio_offset_align; + /* 0xa0 */ - __u64 stx_subvol; /* Subvolume identifier */ - __u32 stx_atomic_write_unit_min; /* Min atomic write unit in bytes */ - __u32 stx_atomic_write_unit_max; /* Max atomic write unit in bytes */ + /* Subvolume identifier */ + __u64 stx_subvol; + + /* Min atomic write unit in bytes */ + __u32 stx_atomic_write_unit_min; + + /* Max atomic write unit in bytes */ + __u32 stx_atomic_write_unit_max; + /* 0xb0 */ - __u32 stx_atomic_write_segments_max; /* Max atomic write segment count */ - __u32 __spare1[1]; + /* Max atomic write segment count */ + __u32 stx_atomic_write_segments_max; + + /* File offset alignment for direct I/O reads */ + __u32 stx_dio_read_offset_align; + /* 0xb8 */ __u64 __spare3[9]; /* Spare space for future expansion */ + /* 0x100 */ }; @@ -164,6 +214,7 @@ struct statx { #define STATX_MNT_ID_UNIQUE 0x00004000U /* Want/got extended stx_mount_id */ #define STATX_SUBVOL 0x00008000U /* Want/got stx_subvol */ #define STATX_WRITE_ATOMIC 0x00010000U /* Want/got atomic_write_* fields */ +#define STATX_DIO_READ_ALIGN 0x00020000U /* Want/got dio read alignment info */ #define STATX__RESERVED 0x80000000U /* Reserved for future struct statx expansion */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h index 6e6907e63bfc..a15ac2fa4b20 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fcntl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fcntl.h @@ -155,4 +155,8 @@ #define AT_HANDLE_MNT_ID_UNIQUE 0x001 /* Return the u64 unique mount ID. */ #define AT_HANDLE_CONNECTABLE 0x002 /* Request a connectable file handle */ +/* Flags for execveat2(2). */ +#define AT_EXECVE_CHECK 0x10000 /* Only perform a check if execution + would be allowed. */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/tools/perf/trace/beauty/include/uapi/linux/fs.h b/tools/perf/trace/beauty/include/uapi/linux/fs.h index 753971770733..e762e1af650c 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/fs.h +++ b/tools/perf/trace/beauty/include/uapi/linux/fs.h @@ -40,6 +40,15 @@ #define BLOCK_SIZE_BITS 10 #define BLOCK_SIZE (1< Date: Wed, 9 Apr 2025 17:11:20 -0700 Subject: [PATCH 073/101] tools headers: Update the syscall table with the kernel sources To pick up the changes in: c4a16820d9019940 fs: add open_tree_attr() 2df1ad0d25803399 x86/arch_prctl: Simplify sys_arch_prctl() e632bca07c8eef1d arm64: generate 64-bit syscall.tbl This is basically to support the new open_tree_attr syscall. But it also needs to update asm-generic unistd.h header to get the new syscall number. And arm64 unistd.h header was converted to use the generic 64-bit header. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/scripts/syscall.tbl scripts/syscall.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_32.tbl arch/x86/entry/syscalls/syscall_32.tbl diff -u tools/perf/arch/x86/entry/syscalls/syscall_64.tbl arch/x86/entry/syscalls/syscall_64.tbl diff -u tools/perf/arch/powerpc/entry/syscalls/syscall.tbl arch/powerpc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/s390/entry/syscalls/syscall.tbl arch/s390/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl arch/mips/kernel/syscalls/syscall_n64.tbl diff -u tools/perf/arch/arm/entry/syscalls/syscall.tbl arch/arm/tools/syscall.tbl diff -u tools/perf/arch/sh/entry/syscalls/syscall.tbl arch/sh/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/sparc/entry/syscalls/syscall.tbl arch/sparc/kernel/syscalls/syscall.tbl diff -u tools/perf/arch/xtensa/entry/syscalls/syscall.tbl arch/xtensa/kernel/syscalls/syscall.tbl diff -u tools/arch/arm64/include/uapi/asm/unistd.h arch/arm64/include/uapi/asm/unistd.h diff -u tools/include/uapi/asm-generic/unistd.h include/uapi/asm-generic/unistd.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-6-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/arm64/include/uapi/asm/unistd.h | 24 +------------------ tools/include/uapi/asm-generic/unistd.h | 4 +++- .../perf/arch/arm/entry/syscalls/syscall.tbl | 1 + .../arch/mips/entry/syscalls/syscall_n64.tbl | 1 + .../arch/powerpc/entry/syscalls/syscall.tbl | 1 + .../perf/arch/s390/entry/syscalls/syscall.tbl | 1 + tools/perf/arch/sh/entry/syscalls/syscall.tbl | 1 + .../arch/sparc/entry/syscalls/syscall.tbl | 1 + .../arch/x86/entry/syscalls/syscall_32.tbl | 3 ++- .../arch/x86/entry/syscalls/syscall_64.tbl | 1 + .../arch/xtensa/entry/syscalls/syscall.tbl | 1 + tools/scripts/syscall.tbl | 1 + 12 files changed, 15 insertions(+), 25 deletions(-) diff --git a/tools/arch/arm64/include/uapi/asm/unistd.h b/tools/arch/arm64/include/uapi/asm/unistd.h index 9306726337fe..df36f23876e8 100644 --- a/tools/arch/arm64/include/uapi/asm/unistd.h +++ b/tools/arch/arm64/include/uapi/asm/unistd.h @@ -1,24 +1,2 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#define __ARCH_WANT_RENAMEAT -#define __ARCH_WANT_NEW_STAT -#define __ARCH_WANT_SET_GET_RLIMIT -#define __ARCH_WANT_TIME32_SYSCALLS -#define __ARCH_WANT_MEMFD_SECRET - -#include +#include diff --git a/tools/include/uapi/asm-generic/unistd.h b/tools/include/uapi/asm-generic/unistd.h index 88dc393c2bca..2892a45023af 100644 --- a/tools/include/uapi/asm-generic/unistd.h +++ b/tools/include/uapi/asm-generic/unistd.h @@ -849,9 +849,11 @@ __SYSCALL(__NR_getxattrat, sys_getxattrat) __SYSCALL(__NR_listxattrat, sys_listxattrat) #define __NR_removexattrat 466 __SYSCALL(__NR_removexattrat, sys_removexattrat) +#define __NR_open_tree_attr 467 +__SYSCALL(__NR_open_tree_attr, sys_open_tree_attr) #undef __NR_syscalls -#define __NR_syscalls 467 +#define __NR_syscalls 468 /* * 32 bit systems traditionally used different diff --git a/tools/perf/arch/arm/entry/syscalls/syscall.tbl b/tools/perf/arch/arm/entry/syscalls/syscall.tbl index 49eeb2ad8dbd..27c1d5ebcd91 100644 --- a/tools/perf/arch/arm/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/arm/entry/syscalls/syscall.tbl @@ -481,3 +481,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl index c844cd5cda62..1e8c44c7b614 100644 --- a/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl +++ b/tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl @@ -381,3 +381,4 @@ 464 n64 getxattrat sys_getxattrat 465 n64 listxattrat sys_listxattrat 466 n64 removexattrat sys_removexattrat +467 n64 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl index d8b4ab78bef0..9a084bdb8926 100644 --- a/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/powerpc/entry/syscalls/syscall.tbl @@ -557,3 +557,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/s390/entry/syscalls/syscall.tbl b/tools/perf/arch/s390/entry/syscalls/syscall.tbl index e9115b4d8b63..a4569b96ef06 100644 --- a/tools/perf/arch/s390/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/s390/entry/syscalls/syscall.tbl @@ -469,3 +469,4 @@ 464 common getxattrat sys_getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sh/entry/syscalls/syscall.tbl b/tools/perf/arch/sh/entry/syscalls/syscall.tbl index c8cad33bf250..52a7652fcff6 100644 --- a/tools/perf/arch/sh/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sh/entry/syscalls/syscall.tbl @@ -470,3 +470,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl index 727f99d333b3..83e45eb6c095 100644 --- a/tools/perf/arch/sparc/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/sparc/entry/syscalls/syscall.tbl @@ -512,3 +512,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl index 4d0fb2fba7e2..ac007ea00979 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_32.tbl @@ -396,7 +396,7 @@ 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free 383 i386 statx sys_statx -384 i386 arch_prctl sys_arch_prctl compat_sys_arch_prctl +384 i386 arch_prctl sys_arch_prctl 385 i386 io_pgetevents sys_io_pgetevents_time32 compat_sys_io_pgetevents 386 i386 rseq sys_rseq 393 i386 semget sys_semget @@ -472,3 +472,4 @@ 464 i386 getxattrat sys_getxattrat 465 i386 listxattrat sys_listxattrat 466 i386 removexattrat sys_removexattrat +467 i386 open_tree_attr sys_open_tree_attr diff --git a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl index 5eb708bff1c7..cfb5ca41e30d 100644 --- a/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl +++ b/tools/perf/arch/x86/entry/syscalls/syscall_64.tbl @@ -390,6 +390,7 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr # # Due to a historical design error, certain syscalls are numbered differently diff --git a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl index 37effc1b134e..f657a77314f8 100644 --- a/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl +++ b/tools/perf/arch/xtensa/entry/syscalls/syscall.tbl @@ -437,3 +437,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr diff --git a/tools/scripts/syscall.tbl b/tools/scripts/syscall.tbl index ebbdb3c42e9f..580b4e246aec 100644 --- a/tools/scripts/syscall.tbl +++ b/tools/scripts/syscall.tbl @@ -407,3 +407,4 @@ 464 common getxattrat sys_getxattrat 465 common listxattrat sys_listxattrat 466 common removexattrat sys_removexattrat +467 common open_tree_attr sys_open_tree_attr From df4bd8c76d49cf5948d63987f4a795c544155906 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:21 -0700 Subject: [PATCH 074/101] tools headers: Update the uapi/linux/prctl.h copy with the kernel sources To pick up the changes in: ec2d0c04624b3c8a posix-timers: Provide a mechanism to allocate a given timer ID Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/linux/prctl.h include/uapi/linux/prctl.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: Thomas Gleixner Link: https://lore.kernel.org/r/20250410001125.391820-7-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/linux/prctl.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/tools/perf/trace/beauty/include/uapi/linux/prctl.h b/tools/perf/trace/beauty/include/uapi/linux/prctl.h index 5c6080680cb2..15c18ef4eb11 100644 --- a/tools/perf/trace/beauty/include/uapi/linux/prctl.h +++ b/tools/perf/trace/beauty/include/uapi/linux/prctl.h @@ -353,4 +353,15 @@ struct prctl_mm_map { */ #define PR_LOCK_SHADOW_STACK_STATUS 76 +/* + * Controls the mode of timer_create() for CRIU restore operations. + * Enabling this allows CRIU to restore timers with explicit IDs. + * + * Don't use for normal operations as the result might be undefined. + */ +#define PR_TIMER_CREATE_RESTORE_IDS 77 +# define PR_TIMER_CREATE_RESTORE_IDS_OFF 0 +# define PR_TIMER_CREATE_RESTORE_IDS_ON 1 +# define PR_TIMER_CREATE_RESTORE_IDS_GET 2 + #endif /* _LINUX_PRCTL_H */ From 4056cf407253ac81fc960088acfe9579496a871f Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:22 -0700 Subject: [PATCH 075/101] tools headers: Update the uapi/asm-generic/mman-common.h copy with the kernel sources To pick up the changes in: 6d61527d931ba07b mm/pkey: Add PKEY_UNRESTRICTED macro Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/include/uapi/asm-generic/mman-common.h include/uapi/asm-generic/mman-common.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-arch@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-8-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/include/uapi/asm-generic/mman-common.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/include/uapi/asm-generic/mman-common.h b/tools/include/uapi/asm-generic/mman-common.h index 1ea2c4c33b86..ef1c27fa3c57 100644 --- a/tools/include/uapi/asm-generic/mman-common.h +++ b/tools/include/uapi/asm-generic/mman-common.h @@ -85,6 +85,7 @@ /* compatibility flags */ #define MAP_FILE 0 +#define PKEY_UNRESTRICTED 0x0 #define PKEY_DISABLE_ACCESS 0x1 #define PKEY_DISABLE_WRITE 0x2 #define PKEY_ACCESS_MASK (PKEY_DISABLE_ACCESS |\ From 74709981873d2baa5573735b1f24108206d0197e Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:23 -0700 Subject: [PATCH 076/101] tools headers: Update the linux/unaligned.h copy with the kernel sources To pick up the changes in: 3846699217798061 ALSA: rawmidi: Make tied_device=0 as default / unknown 7bb49d2e8b52adac ALSA: rawmidi: Bump protocol version to 2.0.5 b8fefed73a952a33 ALSA: rawmidi: Show substream activity in info ioctl bdf46443f350dd5d ALSA: rawmidi: Expose the tied device number in info ioctl Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/perf/trace/beauty/include/uapi/sound/asound.h include/uapi/sound/asound.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: linux-sound@vger.kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-9-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/trace/beauty/include/uapi/sound/asound.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tools/perf/trace/beauty/include/uapi/sound/asound.h b/tools/perf/trace/beauty/include/uapi/sound/asound.h index 4cd513215bcd..5a049eeaecce 100644 --- a/tools/perf/trace/beauty/include/uapi/sound/asound.h +++ b/tools/perf/trace/beauty/include/uapi/sound/asound.h @@ -716,7 +716,7 @@ enum { * Raw MIDI section - /dev/snd/midi?? */ -#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 4) +#define SNDRV_RAWMIDI_VERSION SNDRV_PROTOCOL_VERSION(2, 0, 5) enum { SNDRV_RAWMIDI_STREAM_OUTPUT = 0, @@ -728,6 +728,9 @@ enum { #define SNDRV_RAWMIDI_INFO_INPUT 0x00000002 #define SNDRV_RAWMIDI_INFO_DUPLEX 0x00000004 #define SNDRV_RAWMIDI_INFO_UMP 0x00000008 +#define SNDRV_RAWMIDI_INFO_STREAM_INACTIVE 0x00000010 + +#define SNDRV_RAWMIDI_DEVICE_UNKNOWN 0 struct snd_rawmidi_info { unsigned int device; /* RO/WR (control): device number */ @@ -740,7 +743,8 @@ struct snd_rawmidi_info { unsigned char subname[32]; /* name of active or selected subdevice */ unsigned int subdevices_count; unsigned int subdevices_avail; - unsigned char reserved[64]; /* reserved for future use */ + int tied_device; /* R: tied rawmidi device (UMP/legacy) */ + unsigned char reserved[60]; /* reserved for future use */ }; #define SNDRV_RAWMIDI_MODE_FRAMING_MASK (7<<0) From 847f1403d3ee51278dfbece84ec7f199de43daa5 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:24 -0700 Subject: [PATCH 077/101] tools headers: Update the x86 headers with the kernel sources To pick up the changes in: 841326332bcb13ae x86/cpufeatures: Generate the header based on build config 440a65b7d25fb06f x86/mm: Enable AMD translation cache extensions 767ae437a32d6447 x86/mm: Add INVLPGB feature and Kconfig entry b4cc466b97359011 cpufreq/amd-pstate: Replace all AMD_CPPC_* macros with masks 98c7a713db91c5a9 x86/bugs: Add X86_BUG_SPECTRE_V2_USER 8f64eee70cdd3bb8 x86/bugs: Remove X86_FEATURE_USE_IBPB 8442df2b49ed9bcd x86/bugs: KVM: Add support for SRSO_MSR_FIX 70792aed14551e31 x86/cpufeatures: Add CPUID feature bit for Idle HLT intercept 968e9bc4cef87054 x86: move ZMM exclusion list into CPU feature flag c631a2de7ae48d50 perf/x86/intel: Ensure LBRs are disabled when a CPU is starting 38cc6495cdec18a4 x86/sev: Prevent GUEST_TSC_FREQ MSR interception for Secure TSC enabled guests 288bba2f4c8be1e1 x86/cpufeatures: Remove "AMD" from the comments to the AMD-specific leaf 877818802c3e970f x86/bugs: Add SRSO_USER_KERNEL_NO support 8ae3291f773befee x86/sev: Add full support for a segmented RMP table 0cbc0258415814c8 x86/sev: Add support for the RMPREAD instruction 7a470e826d7521be x86/cpufeatures: Free up unused feature bits Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/cpufeatures.h diff -u tools/arch/x86/include/asm/msr-index.h arch/x86/include/asm/msr-index.h Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-10-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/include/asm/cpufeatures.h | 28 +++++++++++++++------ tools/arch/x86/include/asm/msr-index.h | 31 +++++++++++++++--------- 2 files changed, 39 insertions(+), 20 deletions(-) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d..6c2c152d8a67 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -75,8 +75,8 @@ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* "centaur_mcr" Centaur MCRs (= MTRRs) */ #define X86_FEATURE_K8 ( 3*32+ 4) /* Opteron, Athlon64 */ #define X86_FEATURE_ZEN5 ( 3*32+ 5) /* CPU based on Zen5 microarchitecture */ -#define X86_FEATURE_P3 ( 3*32+ 6) /* P3 */ -#define X86_FEATURE_P4 ( 3*32+ 7) /* P4 */ +/* Free ( 3*32+ 6) */ +/* Free ( 3*32+ 7) */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* "constant_tsc" TSC ticks at a constant rate */ #define X86_FEATURE_UP ( 3*32+ 9) /* "up" SMP kernel running on UP */ #define X86_FEATURE_ART ( 3*32+10) /* "art" Always running timer (ART) */ @@ -329,6 +329,7 @@ #define X86_FEATURE_CLZERO (13*32+ 0) /* "clzero" CLZERO instruction */ #define X86_FEATURE_IRPERF (13*32+ 1) /* "irperf" Instructions Retired Count */ #define X86_FEATURE_XSAVEERPTR (13*32+ 2) /* "xsaveerptr" Always save/restore FP error pointers */ +#define X86_FEATURE_INVLPGB (13*32+ 3) /* INVLPGB and TLBSYNC instructions supported */ #define X86_FEATURE_RDPRU (13*32+ 4) /* "rdpru" Read processor register at user level */ #define X86_FEATURE_WBNOINVD (13*32+ 9) /* "wbnoinvd" WBNOINVD instruction */ #define X86_FEATURE_AMD_IBPB (13*32+12) /* Indirect Branch Prediction Barrier */ @@ -377,6 +378,7 @@ #define X86_FEATURE_V_SPEC_CTRL (15*32+20) /* "v_spec_ctrl" Virtual SPEC_CTRL */ #define X86_FEATURE_VNMI (15*32+25) /* "vnmi" Virtual NMI */ #define X86_FEATURE_SVME_ADDR_CHK (15*32+28) /* SVME addr check */ +#define X86_FEATURE_IDLE_HLT (15*32+30) /* IDLE HLT intercept */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* "avx512vbmi" AVX512 Vector Bit Manipulation instructions*/ @@ -434,15 +436,18 @@ #define X86_FEATURE_SPEC_CTRL_SSBD (18*32+31) /* Speculative Store Bypass Disable */ /* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */ -#define X86_FEATURE_SME (19*32+ 0) /* "sme" AMD Secure Memory Encryption */ -#define X86_FEATURE_SEV (19*32+ 1) /* "sev" AMD Secure Encrypted Virtualization */ +#define X86_FEATURE_SME (19*32+ 0) /* "sme" Secure Memory Encryption */ +#define X86_FEATURE_SEV (19*32+ 1) /* "sev" Secure Encrypted Virtualization */ #define X86_FEATURE_VM_PAGE_FLUSH (19*32+ 2) /* VM Page Flush MSR is supported */ -#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" AMD Secure Encrypted Virtualization - Encrypted State */ -#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" AMD Secure Encrypted Virtualization - Secure Nested Paging */ +#define X86_FEATURE_SEV_ES (19*32+ 3) /* "sev_es" Secure Encrypted Virtualization - Encrypted State */ +#define X86_FEATURE_SEV_SNP (19*32+ 4) /* "sev_snp" Secure Encrypted Virtualization - Secure Nested Paging */ #define X86_FEATURE_V_TSC_AUX (19*32+ 9) /* Virtual TSC_AUX */ -#define X86_FEATURE_SME_COHERENT (19*32+10) /* AMD hardware-enforced cache coherency */ -#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" AMD SEV-ES full debug state swap support */ +#define X86_FEATURE_SME_COHERENT (19*32+10) /* hardware-enforced cache coherency */ +#define X86_FEATURE_DEBUG_SWAP (19*32+14) /* "debug_swap" SEV-ES full debug state swap support */ +#define X86_FEATURE_RMPREAD (19*32+21) /* RMPREAD instruction */ +#define X86_FEATURE_SEGMENTED_RMP (19*32+23) /* Segmented RMP support */ #define X86_FEATURE_SVSM (19*32+28) /* "svsm" SVSM present */ +#define X86_FEATURE_HV_INUSE_WR_ALLOWED (19*32+30) /* Allow Write to in-use hypervisor-owned pages */ /* AMD-defined Extended Feature 2 EAX, CPUID level 0x80000021 (EAX), word 20 */ #define X86_FEATURE_NO_NESTED_DATA_BP (20*32+ 0) /* No Nested Data Breakpoints */ @@ -455,6 +460,11 @@ #define X86_FEATURE_SBPB (20*32+27) /* Selective Branch Prediction Barrier */ #define X86_FEATURE_IBPB_BRTYPE (20*32+28) /* MSR_PRED_CMD[IBPB] flushes all branch type predictions */ #define X86_FEATURE_SRSO_NO (20*32+29) /* CPU is not affected by SRSO */ +#define X86_FEATURE_SRSO_USER_KERNEL_NO (20*32+30) /* CPU is not affected by SRSO across user/kernel boundaries */ +#define X86_FEATURE_SRSO_BP_SPEC_REDUCE (20*32+31) /* + * BP_CFG[BpSpecReduce] can be used to mitigate SRSO for VMs. + * (SRSO_MSR_FIX in the official doc). + */ /* * Extended auxiliary flags: Linux defined - for features scattered in various @@ -470,6 +480,7 @@ #define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -521,4 +532,5 @@ #define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ #define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ #define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/msr-index.h b/tools/arch/x86/include/asm/msr-index.h index dc1c1057f26e..e6134ef2263d 100644 --- a/tools/arch/x86/include/asm/msr-index.h +++ b/tools/arch/x86/include/asm/msr-index.h @@ -397,7 +397,8 @@ #define MSR_IA32_PASID_VALID BIT_ULL(31) /* DEBUGCTLMSR bits (others vary by model): */ -#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_LBR_BIT 0 /* last branch recording */ +#define DEBUGCTLMSR_LBR (1UL << DEBUGCTLMSR_LBR_BIT) #define DEBUGCTLMSR_BTF_SHIFT 1 #define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ #define DEBUGCTLMSR_BUS_LOCK_DETECT (1UL << 2) @@ -610,6 +611,7 @@ #define MSR_AMD_PERF_CTL 0xc0010062 #define MSR_AMD_PERF_STATUS 0xc0010063 #define MSR_AMD_PSTATE_DEF_BASE 0xc0010064 +#define MSR_AMD64_GUEST_TSC_FREQ 0xc0010134 #define MSR_AMD64_OSVW_ID_LENGTH 0xc0010140 #define MSR_AMD64_OSVW_STATUS 0xc0010141 #define MSR_AMD_PPIN_CTL 0xc00102f0 @@ -646,6 +648,7 @@ #define MSR_AMD64_IBS_REG_COUNT_MAX 8 /* includes MSR_AMD64_IBSBRTARGET */ #define MSR_AMD64_SVM_AVIC_DOORBELL 0xc001011b #define MSR_AMD64_VM_PAGE_FLUSH 0xc001011e +#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f #define MSR_AMD64_SEV_ES_GHCB 0xc0010130 #define MSR_AMD64_SEV 0xc0010131 #define MSR_AMD64_SEV_ENABLED_BIT 0 @@ -684,11 +687,12 @@ #define MSR_AMD64_SNP_SMT_PROT BIT_ULL(MSR_AMD64_SNP_SMT_PROT_BIT) #define MSR_AMD64_SNP_RESV_BIT 18 #define MSR_AMD64_SNP_RESERVED_MASK GENMASK_ULL(63, MSR_AMD64_SNP_RESV_BIT) - -#define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f - #define MSR_AMD64_RMP_BASE 0xc0010132 #define MSR_AMD64_RMP_END 0xc0010133 +#define MSR_AMD64_RMP_CFG 0xc0010136 +#define MSR_AMD64_SEG_RMP_ENABLED_BIT 0 +#define MSR_AMD64_SEG_RMP_ENABLED BIT_ULL(MSR_AMD64_SEG_RMP_ENABLED_BIT) +#define MSR_AMD64_RMP_SEGMENT_SHIFT(x) (((x) & GENMASK_ULL(13, 8)) >> 8) #define MSR_SVSM_CAA 0xc001f000 @@ -699,15 +703,17 @@ #define MSR_AMD_CPPC_REQ 0xc00102b3 #define MSR_AMD_CPPC_STATUS 0xc00102b4 -#define AMD_CPPC_LOWEST_PERF(x) (((x) >> 0) & 0xff) -#define AMD_CPPC_LOWNONLIN_PERF(x) (((x) >> 8) & 0xff) -#define AMD_CPPC_NOMINAL_PERF(x) (((x) >> 16) & 0xff) -#define AMD_CPPC_HIGHEST_PERF(x) (((x) >> 24) & 0xff) +/* Masks for use with MSR_AMD_CPPC_CAP1 */ +#define AMD_CPPC_LOWEST_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_LOWNONLIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_NOMINAL_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_HIGHEST_PERF_MASK GENMASK(31, 24) -#define AMD_CPPC_MAX_PERF(x) (((x) & 0xff) << 0) -#define AMD_CPPC_MIN_PERF(x) (((x) & 0xff) << 8) -#define AMD_CPPC_DES_PERF(x) (((x) & 0xff) << 16) -#define AMD_CPPC_ENERGY_PERF_PREF(x) (((x) & 0xff) << 24) +/* Masks for use with MSR_AMD_CPPC_REQ */ +#define AMD_CPPC_MAX_PERF_MASK GENMASK(7, 0) +#define AMD_CPPC_MIN_PERF_MASK GENMASK(15, 8) +#define AMD_CPPC_DES_PERF_MASK GENMASK(23, 16) +#define AMD_CPPC_EPP_PERF_MASK GENMASK(31, 24) /* AMD Performance Counter Global Status and Control MSRs */ #define MSR_AMD64_PERF_CNTR_GLOBAL_STATUS 0xc0000300 @@ -719,6 +725,7 @@ /* Zen4 */ #define MSR_ZEN4_BP_CFG 0xc001102e +#define MSR_ZEN4_BP_CFG_BP_SPEC_REDUCE_BIT 4 #define MSR_ZEN4_BP_CFG_SHARED_BTB_FIX_BIT 5 /* Fam 19h MSRs */ From 7f56978e5876521eaa90fda0e63630fa64f69bce Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 17:11:25 -0700 Subject: [PATCH 078/101] tools headers: Update the arch/x86/lib/memset_64.S copy with the kernel sources To pick up the changes in: 2981557cb0408e14 x86,kcfi: Fix EXPORT_SYMBOL vs kCFI That required adding a copy of include/linux/cfi_types.h and its checking in tools/perf/check-headers.h. Addressing this perf tools build warning: Warning: Kernel ABI header differences: diff -u tools/arch/x86/lib/memset_64.S arch/x86/lib/memset_64.S Please see tools/include/uapi/README for further details. Acked-by: Ingo Molnar Tested-by: Venkat Rao Bagalkote Cc: x86@kernel.org Link: https://lore.kernel.org/r/20250410001125.391820-11-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/arch/x86/lib/memset_64.S | 3 ++- tools/include/linux/cfi_types.h | 45 +++++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 3 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 tools/include/linux/cfi_types.h diff --git a/tools/arch/x86/lib/memset_64.S b/tools/arch/x86/lib/memset_64.S index 0199d56cb479..d66b710d628f 100644 --- a/tools/arch/x86/lib/memset_64.S +++ b/tools/arch/x86/lib/memset_64.S @@ -3,6 +3,7 @@ #include #include +#include #include #include @@ -28,7 +29,7 @@ * only for the return value that is the same as the source input, * which the compiler could/should do much better anyway. */ -SYM_FUNC_START(__memset) +SYM_TYPED_FUNC_START(__memset) ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS movq %rdi,%r9 diff --git a/tools/include/linux/cfi_types.h b/tools/include/linux/cfi_types.h new file mode 100644 index 000000000000..6b8713675765 --- /dev/null +++ b/tools/include/linux/cfi_types.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Clang Control Flow Integrity (CFI) type definitions. + */ +#ifndef _LINUX_CFI_TYPES_H +#define _LINUX_CFI_TYPES_H + +#ifdef __ASSEMBLY__ +#include + +#ifdef CONFIG_CFI_CLANG +/* + * Use the __kcfi_typeid_ type identifier symbol to + * annotate indirectly called assembly functions. The compiler emits + * these symbols for all address-taken function declarations in C + * code. + */ +#ifndef __CFI_TYPE +#define __CFI_TYPE(name) \ + .4byte __kcfi_typeid_##name +#endif + +#define SYM_TYPED_ENTRY(name, linkage, align...) \ + linkage(name) ASM_NL \ + align ASM_NL \ + __CFI_TYPE(name) ASM_NL \ + name: + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_TYPED_ENTRY(name, linkage, align) + +#else /* CONFIG_CFI_CLANG */ + +#define SYM_TYPED_START(name, linkage, align...) \ + SYM_START(name, linkage, align) + +#endif /* CONFIG_CFI_CLANG */ + +#ifndef SYM_TYPED_FUNC_START +#define SYM_TYPED_FUNC_START(name) \ + SYM_TYPED_START(name, SYM_L_GLOBAL, SYM_A_ALIGN) +#endif + +#endif /* __ASSEMBLY__ */ +#endif /* _LINUX_CFI_TYPES_H */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9c..857f6646cc23 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -20,6 +20,7 @@ FILES=( "include/uapi/linux/stat.h" "include/linux/bits.h" "include/vdso/bits.h" + "include/linux/cfi_types.h" "include/linux/const.h" "include/vdso/const.h" "include/vdso/unaligned.h" From ffc59e32c67e599cc473d6427a4aa584399d5b3c Mon Sep 17 00:00:00 2001 From: Leon Romanovsky Date: Thu, 10 Apr 2025 15:32:20 +0300 Subject: [PATCH 079/101] RDMA/bnxt_re: Remove unusable nq variable Remove nq variable from bnxt_re_create_srq() and bnxt_re_destroy_srq() as it generates the following compilation warnings: >> drivers/infiniband/hw/bnxt_re/ib_verbs.c:1777:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1777 | struct bnxt_qplib_nq *nq = NULL; | ^ drivers/infiniband/hw/bnxt_re/ib_verbs.c:1828:24: warning: variable 'nq' set but not used [-Wunused-but-set-variable] 1828 | struct bnxt_qplib_nq *nq = NULL; | ^ 2 warnings generated. Fixes: 6b395d31146a ("RDMA/bnxt_re: Fix budget handling of notification queue") Link: https://patch.msgid.link/r/8a4343e217d7d1c0a5a786b785c4ac57cb72a2a0.1744288299.git.leonro@nvidia.com Reported-by: kernel test robot Closes: https://lore.kernel.org/oe-kbuild-all/202504091055.CzgXnk4C-lkp@intel.com/ Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/bnxt_re/ib_verbs.c | 5 ----- 1 file changed, 5 deletions(-) diff --git a/drivers/infiniband/hw/bnxt_re/ib_verbs.c b/drivers/infiniband/hw/bnxt_re/ib_verbs.c index e14b05cd089a..063801384b2b 100644 --- a/drivers/infiniband/hw/bnxt_re/ib_verbs.c +++ b/drivers/infiniband/hw/bnxt_re/ib_verbs.c @@ -1774,10 +1774,7 @@ int bnxt_re_destroy_srq(struct ib_srq *ib_srq, struct ib_udata *udata) ib_srq); struct bnxt_re_dev *rdev = srq->rdev; struct bnxt_qplib_srq *qplib_srq = &srq->qplib_srq; - struct bnxt_qplib_nq *nq = NULL; - if (qplib_srq->cq) - nq = qplib_srq->cq->nq; if (rdev->chip_ctx->modes.toggle_bits & BNXT_QPLIB_SRQ_TOGGLE_BIT) { free_page((unsigned long)srq->uctx_srq_page); hash_del(&srq->hash_entry); @@ -1825,7 +1822,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, struct ib_udata *udata) { struct bnxt_qplib_dev_attr *dev_attr; - struct bnxt_qplib_nq *nq = NULL; struct bnxt_re_ucontext *uctx; struct bnxt_re_dev *rdev; struct bnxt_re_srq *srq; @@ -1871,7 +1867,6 @@ int bnxt_re_create_srq(struct ib_srq *ib_srq, srq->qplib_srq.eventq_hw_ring_id = rdev->nqr->nq[0].ring_id; srq->qplib_srq.sg_info.pgsize = PAGE_SIZE; srq->qplib_srq.sg_info.pgshft = PAGE_SHIFT; - nq = &rdev->nqr->nq[0]; if (udata) { rc = bnxt_re_init_user_srq(rdev, pd, srq, udata); From 1293dacbbd43ab9848ac4655f6f2ba1dcc5a96ad Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 9 Apr 2025 10:35:31 -0300 Subject: [PATCH 080/101] perf libunwind arm64: Fix missing close parens in an if statement While testing building with libunwind (using LIBUNWIND=1) in various arches I noticed a problem on arm64, on an rpi5 system, a missing close parens in a change related to dso__data_get_fd() usage, fix it. Fixes: 5ac22c35aa8519f1 ("perf dso: Use lock annotations to fix asan deadlock") Signed-off-by: Arnaldo Carvalho de Melo Link: https://lore.kernel.org/r/Z_Z3o8KvB2i5c6ab@x1 Signed-off-by: Namhyung Kim --- tools/perf/util/unwind-libunwind-local.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 9fb2c1343c7f..0b037e7389a0 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -371,7 +371,7 @@ static int read_unwind_spec_debug_frame(struct dso *dso, * has to be pointed by symsrc_filename */ if (ofs == 0) { - if (dso__data_get_fd(dso, machine, &fd) { + if (dso__data_get_fd(dso, machine, &fd)) { ofs = elf_section_offset(fd, ".debug_frame"); dso__data_put_fd(dso); } From 718f9038acc53631cc887676b9c433e6fb9f15dc Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Apr 2025 09:04:00 +0200 Subject: [PATCH 081/101] x86/cpuid: Remove obsolete CPUID(0x2) iteration macro The CPUID(0x2) cache descriptors iterator at : for_each_leaf_0x2_desc() has no more call sites. Remove it. Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20250411070401.1358760-2-darwi@linutronix.de --- arch/x86/include/asm/cpuid/leaf_0x2_api.h | 23 ----------------------- 1 file changed, 23 deletions(-) diff --git a/arch/x86/include/asm/cpuid/leaf_0x2_api.h b/arch/x86/include/asm/cpuid/leaf_0x2_api.h index 46ecb15e92d9..09fa3070b271 100644 --- a/arch/x86/include/asm/cpuid/leaf_0x2_api.h +++ b/arch/x86/include/asm/cpuid/leaf_0x2_api.h @@ -40,29 +40,6 @@ static inline void cpuid_get_leaf_0x2_regs(union leaf_0x2_regs *regs) } } -/** - * for_each_leaf_0x2_desc() - Iterator for CPUID leaf 0x2 descriptors - * @regs: Leaf 0x2 output, as returned by cpuid_get_leaf_0x2_regs() - * @desc: Pointer to the returned descriptor for each iteration - * - * Loop over the 1-byte descriptors in the passed leaf 0x2 output registers - * @regs. Provide each descriptor through @desc. - * - * Note that the first byte is skipped as it is not a descriptor. - * - * Sample usage:: - * - * union leaf_0x2_regs regs; - * u8 *desc; - * - * cpuid_get_leaf_0x2_regs(®s); - * for_each_leaf_0x2_desc(regs, desc) { - * // Handle *desc value - * } - */ -#define for_each_leaf_0x2_desc(regs, desc) \ - for (desc = &(regs).desc[1]; desc < &(regs).desc[16]; desc++) - /** * for_each_leaf_0x2_entry() - Iterator for parsed leaf 0x2 descriptors * @regs: Leaf 0x2 register output, returned by cpuid_get_leaf_0x2_regs() From 62e565273993ee47c82ca2975e65ce4bec3c3697 Mon Sep 17 00:00:00 2001 From: "Ahmed S. Darwish" Date: Fri, 11 Apr 2025 09:04:01 +0200 Subject: [PATCH 082/101] x86/cacheinfo: Standardize header files and CPUID references Reference header files using their canonical form . Standardize on CPUID(0xN), instead of CPUID(N), for all standard leaves. This removes ambiguity and aligns them with their extended counterparts like CPUID(0x8000001d). References: 0dd09e215a39 ("x86/cacheinfo: Apply maintainer-tip coding style fixes") Signed-off-by: Ahmed S. Darwish Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: "H. Peter Anvin" Cc: John Ogness Link: https://lore.kernel.org/r/20250411070401.1358760-3-darwi@linutronix.de --- arch/x86/kernel/cpu/cacheinfo.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index 52727f8c0006..cc7ae2bdcf4a 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -3,9 +3,9 @@ * x86 CPU caches detection and configuration * * Previous changes - * - Venkatesh Pallipadi: Cache identification through CPUID(4) + * - Venkatesh Pallipadi: Cache identification through CPUID(0x4) * - Ashok Raj : Work with CPU hotplug infrastructure - * - Andi Kleen / Andreas Herrmann: CPUID(4) emulation on AMD + * - Andi Kleen / Andreas Herrmann: CPUID(0x4) emulation on AMD */ #include @@ -78,7 +78,7 @@ struct _cpuid4_info { unsigned long size; }; -/* Map CPUID(4) EAX.cache_type to linux/cacheinfo.h types */ +/* Map CPUID(0x4) EAX.cache_type to types */ static const enum cache_type cache_type_map[] = { [CTYPE_NULL] = CACHE_TYPE_NOCACHE, [CTYPE_DATA] = CACHE_TYPE_DATA, @@ -87,7 +87,7 @@ static const enum cache_type cache_type_map[] = { }; /* - * Fallback AMD CPUID(4) emulation + * Fallback AMD CPUID(0x4) emulation * AMD CPUs with TOPOEXT can just use CPUID(0x8000001d) * * @AMD_L2_L3_INVALID_ASSOC: cache info for the respective L2/L3 cache should @@ -361,7 +361,7 @@ static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, { /* * If llc_id is still unset, then cpuid_level < 4, which implies - * that the only possibility left is SMT. Since CPUID(2) doesn't + * that the only possibility left is SMT. Since CPUID(0x2) doesn't * specify any shared caches and SMT shares all caches, we can * unconditionally set LLC ID to the package ID so that all * threads share it. @@ -376,7 +376,7 @@ static void intel_cacheinfo_done(struct cpuinfo_x86 *c, unsigned int l3, } /* - * Legacy Intel CPUID(2) path if CPUID(4) is not available. + * Legacy Intel CPUID(0x2) path if CPUID(0x4) is not available. */ static void intel_cacheinfo_0x2(struct cpuinfo_x86 *c) { @@ -466,7 +466,7 @@ static bool intel_cacheinfo_0x4(struct cpuinfo_x86 *c) void init_intel_cacheinfo(struct cpuinfo_x86 *c) { - /* Don't use CPUID(2) if CPUID(4) is supported. */ + /* Don't use CPUID(0x2) if CPUID(0x4) is supported. */ if (intel_cacheinfo_0x4(c)) return; @@ -474,7 +474,7 @@ void init_intel_cacheinfo(struct cpuinfo_x86 *c) } /* - * linux/cacheinfo.h shared_cpu_map setup, AMD/Hygon + * shared_cpu_map setup, AMD/Hygon */ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, const struct _cpuid4_info *id4) @@ -533,7 +533,7 @@ static int __cache_amd_cpumap_setup(unsigned int cpu, int index, } /* - * linux/cacheinfo.h shared_cpu_map setup, Intel + fallback AMD/Hygon + * shared_cpu_map setup, Intel + fallback AMD/Hygon */ static void __cache_cpumap_setup(unsigned int cpu, int index, const struct _cpuid4_info *id4) @@ -599,7 +599,7 @@ int init_cache_level(unsigned int cpu) } /* - * The max shared threads number comes from CPUID(4) EAX[25-14] with input + * The max shared threads number comes from CPUID(0x4) EAX[25-14] with input * ECX as cache index. Then right shift apicid by the number's order to get * cache id for this cache node. */ From a64e4d48a0b77e4ada19ac26ca3a08cd492f6362 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 8 Apr 2025 21:46:29 +0100 Subject: [PATCH 083/101] afs: Fix afs_dynroot_readdir() to not use the RCU read lock afs_dynroot_readdir() uses the RCU read lock to walk the cell list whilst emitting cell automount entries - but dir_emit() may write to a userspace buffer, thereby causing a fault to occur and waits to happen. Fix afs_dynroot_readdir() to get a shared lock on net->cells_lock instead. This can be triggered by enabling lockdep, preconfiguring a number of cells, doing "mount -t afs none /afs -o dyn" (or using the kafs-client package with afs.mount systemd unit enabled) and then doing "ls /afs". Fixes: 1d0b929fc070 ("afs: Change dynroot to create contents on demand") Reported-by: syzbot+3b6c5c6a1d0119b687a1@syzkaller.appspotmail.com Reported-by: syzbot+8245611446194a52150d@syzkaller.appspotmail.com Reported-by: syzbot+1aa62e6852a6ad1c7944@syzkaller.appspotmail.com Reported-by: syzbot+54e6c2176ba76c56217e@syzkaller.appspotmail.com Signed-off-by: David Howells Link: https://lore.kernel.org/1638014.1744145189@warthog.procyon.org.uk cc: Marc Dionne cc: linux-afs@lists.infradead.org cc: linux-fsdevel@vger.kernel.org Signed-off-by: Christian Brauner --- fs/afs/dynroot.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/afs/dynroot.c b/fs/afs/dynroot.c index 691e0ae607a1..8c6130789fde 100644 --- a/fs/afs/dynroot.c +++ b/fs/afs/dynroot.c @@ -348,9 +348,9 @@ static int afs_dynroot_readdir(struct file *file, struct dir_context *ctx) } if ((unsigned long long)ctx->pos <= AFS_MAX_DYNROOT_CELL_INO) { - rcu_read_lock(); + down_read(&net->cells_lock); ret = afs_dynroot_readdir_cells(net, ctx); - rcu_read_unlock(); + up_read(&net->cells_lock); } return ret; } From b2b4483b5d05026218127fc8f38c69adf69c235b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 8 Apr 2025 13:00:53 -0700 Subject: [PATCH 084/101] dcache: convert dentry flag macros to enum Commit 9748cb2dc393 ("VFS: repack DENTRY_ flags.") changed the value of DCACHE_MOUNTED, which broke drgn's path_lookup() helper. drgn is forced to hard-code it because it's a macro, and macros aren't preserved in debugging information by default. Enums, on the other hand, are included in debugging information. Convert the DCACHE_* flag macros to an enum so that debugging tools like drgn and bpftrace can make use of them. Link: https://github.com/osandov/drgn/blob/2027d0fea84d74b835e77392f7040c2a333180c6/drgn/helpers/linux/fs.py#L43-L46 Signed-off-by: Omar Sandoval Link: https://lore.kernel.org/177665a082f048cf536b9cd6af467b3be6b6e6ed.1744141838.git.osandov@fb.com Signed-off-by: Christian Brauner --- include/linux/dcache.h | 104 +++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 55 deletions(-) diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 8d1395f945bf..e9f07e37dd6f 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -173,65 +173,59 @@ struct dentry_operations { */ /* d_flags entries */ -#define DCACHE_OP_HASH BIT(0) -#define DCACHE_OP_COMPARE BIT(1) -#define DCACHE_OP_REVALIDATE BIT(2) -#define DCACHE_OP_DELETE BIT(3) -#define DCACHE_OP_PRUNE BIT(4) +enum dentry_flags { + DCACHE_OP_HASH = BIT(0), + DCACHE_OP_COMPARE = BIT(1), + DCACHE_OP_REVALIDATE = BIT(2), + DCACHE_OP_DELETE = BIT(3), + DCACHE_OP_PRUNE = BIT(4), + /* + * This dentry is possibly not currently connected to the dcache tree, + * in which case its parent will either be itself, or will have this + * flag as well. nfsd will not use a dentry with this bit set, but will + * first endeavour to clear the bit either by discovering that it is + * connected, or by performing lookup operations. Any filesystem which + * supports nfsd_operations MUST have a lookup function which, if it + * finds a directory inode with a DCACHE_DISCONNECTED dentry, will + * d_move that dentry into place and return that dentry rather than the + * passed one, typically using d_splice_alias. + */ + DCACHE_DISCONNECTED = BIT(5), + DCACHE_REFERENCED = BIT(6), /* Recently used, don't discard. */ + DCACHE_DONTCACHE = BIT(7), /* Purge from memory on final dput() */ + DCACHE_CANT_MOUNT = BIT(8), + DCACHE_GENOCIDE = BIT(9), + DCACHE_SHRINK_LIST = BIT(10), + DCACHE_OP_WEAK_REVALIDATE = BIT(11), + /* + * this dentry has been "silly renamed" and has to be deleted on the + * last dput() + */ + DCACHE_NFSFS_RENAMED = BIT(12), + DCACHE_FSNOTIFY_PARENT_WATCHED = BIT(13), /* Parent inode is watched by some fsnotify listener */ + DCACHE_DENTRY_KILLED = BIT(14), + DCACHE_MOUNTED = BIT(15), /* is a mountpoint */ + DCACHE_NEED_AUTOMOUNT = BIT(16), /* handle automount on this dir */ + DCACHE_MANAGE_TRANSIT = BIT(17), /* manage transit from this dirent */ + DCACHE_LRU_LIST = BIT(18), + DCACHE_ENTRY_TYPE = (7 << 19), /* bits 19..21 are for storing type: */ + DCACHE_MISS_TYPE = (0 << 19), /* Negative dentry */ + DCACHE_WHITEOUT_TYPE = (1 << 19), /* Whiteout dentry (stop pathwalk) */ + DCACHE_DIRECTORY_TYPE = (2 << 19), /* Normal directory */ + DCACHE_AUTODIR_TYPE = (3 << 19), /* Lookupless directory (presumed automount) */ + DCACHE_REGULAR_TYPE = (4 << 19), /* Regular file type */ + DCACHE_SPECIAL_TYPE = (5 << 19), /* Other file type */ + DCACHE_SYMLINK_TYPE = (6 << 19), /* Symlink */ + DCACHE_NOKEY_NAME = BIT(22), /* Encrypted name encoded without key */ + DCACHE_OP_REAL = BIT(23), + DCACHE_PAR_LOOKUP = BIT(24), /* being looked up (with parent locked shared) */ + DCACHE_DENTRY_CURSOR = BIT(25), + DCACHE_NORCU = BIT(26), /* No RCU delay for freeing */ +}; -#define DCACHE_DISCONNECTED BIT(5) - /* This dentry is possibly not currently connected to the dcache tree, in - * which case its parent will either be itself, or will have this flag as - * well. nfsd will not use a dentry with this bit set, but will first - * endeavour to clear the bit either by discovering that it is connected, - * or by performing lookup operations. Any filesystem which supports - * nfsd_operations MUST have a lookup function which, if it finds a - * directory inode with a DCACHE_DISCONNECTED dentry, will d_move that - * dentry into place and return that dentry rather than the passed one, - * typically using d_splice_alias. */ - -#define DCACHE_REFERENCED BIT(6) /* Recently used, don't discard. */ - -#define DCACHE_DONTCACHE BIT(7) /* Purge from memory on final dput() */ - -#define DCACHE_CANT_MOUNT BIT(8) -#define DCACHE_GENOCIDE BIT(9) -#define DCACHE_SHRINK_LIST BIT(10) - -#define DCACHE_OP_WEAK_REVALIDATE BIT(11) - -#define DCACHE_NFSFS_RENAMED BIT(12) - /* this dentry has been "silly renamed" and has to be deleted on the last - * dput() */ -#define DCACHE_FSNOTIFY_PARENT_WATCHED BIT(13) - /* Parent inode is watched by some fsnotify listener */ - -#define DCACHE_DENTRY_KILLED BIT(14) - -#define DCACHE_MOUNTED BIT(15) /* is a mountpoint */ -#define DCACHE_NEED_AUTOMOUNT BIT(16) /* handle automount on this dir */ -#define DCACHE_MANAGE_TRANSIT BIT(17) /* manage transit from this dirent */ #define DCACHE_MANAGED_DENTRY \ (DCACHE_MOUNTED|DCACHE_NEED_AUTOMOUNT|DCACHE_MANAGE_TRANSIT) -#define DCACHE_LRU_LIST BIT(18) - -#define DCACHE_ENTRY_TYPE (7 << 19) /* bits 19..21 are for storing type: */ -#define DCACHE_MISS_TYPE (0 << 19) /* Negative dentry */ -#define DCACHE_WHITEOUT_TYPE (1 << 19) /* Whiteout dentry (stop pathwalk) */ -#define DCACHE_DIRECTORY_TYPE (2 << 19) /* Normal directory */ -#define DCACHE_AUTODIR_TYPE (3 << 19) /* Lookupless directory (presumed automount) */ -#define DCACHE_REGULAR_TYPE (4 << 19) /* Regular file type */ -#define DCACHE_SPECIAL_TYPE (5 << 19) /* Other file type */ -#define DCACHE_SYMLINK_TYPE (6 << 19) /* Symlink */ - -#define DCACHE_NOKEY_NAME BIT(22) /* Encrypted name encoded without key */ -#define DCACHE_OP_REAL BIT(23) - -#define DCACHE_PAR_LOOKUP BIT(24) /* being looked up (with parent locked shared) */ -#define DCACHE_DENTRY_CURSOR BIT(25) -#define DCACHE_NORCU BIT(26) /* No RCU delay for freeing */ - extern seqlock_t rename_lock; /* From d43dbf7322a356733b3e3a997cad51dce174d83c Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 9 Apr 2025 10:00:23 +0200 Subject: [PATCH 085/101] mount: ensure we don't pointlessly walk the mount tree This logic got broken recently. Add it back. Fixes: 474f7825d533 ("fs: add copy_mount_setattr() helper") Link: https://lore.kernel.org/20250409-sektflaschen-gecko-27c021fbd222@brauner Tested-by: Mikhail Gavrilov Signed-off-by: Christian Brauner --- fs/namespace.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/fs/namespace.c b/fs/namespace.c index 14935a0500a2..1eb76d6312de 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5189,8 +5189,8 @@ static void finish_mount_kattr(struct mount_kattr *kattr) mnt_idmap_put(kattr->mnt_idmap); } -static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, - struct mount_kattr *kattr) +static int wants_mount_setattr(struct mount_attr __user *uattr, size_t usize, + struct mount_kattr *kattr) { int ret; struct mount_attr attr; @@ -5213,9 +5213,13 @@ static int copy_mount_setattr(struct mount_attr __user *uattr, size_t usize, if (attr.attr_set == 0 && attr.attr_clr == 0 && attr.propagation == 0) - return 0; + return 0; /* Tell caller to not bother. */ - return build_mount_kattr(&attr, usize, kattr); + ret = build_mount_kattr(&attr, usize, kattr); + if (ret < 0) + return ret; + + return 1; } SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, @@ -5247,8 +5251,8 @@ SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - err = copy_mount_setattr(uattr, usize, &kattr); - if (err) + err = wants_mount_setattr(uattr, usize, &kattr); + if (err <= 0) return err; err = user_path_at(dfd, path, kattr.lookup_flags, &target); @@ -5282,15 +5286,17 @@ SYSCALL_DEFINE5(open_tree_attr, int, dfd, const char __user *, filename, if (flags & AT_RECURSIVE) kattr.kflags |= MOUNT_KATTR_RECURSE; - ret = copy_mount_setattr(uattr, usize, &kattr); - if (ret) + ret = wants_mount_setattr(uattr, usize, &kattr); + if (ret < 0) return ret; - ret = do_mount_setattr(&file->f_path, &kattr); - if (ret) - return ret; + if (ret) { + ret = do_mount_setattr(&file->f_path, &kattr); + if (ret) + return ret; - finish_mount_kattr(&kattr); + finish_mount_kattr(&kattr); + } } fd = get_unused_fd_flags(flags & O_CLOEXEC); From 40cb48eba3b4b79e110c1a35d33a48cac54507a2 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 9 Apr 2025 10:00:15 -0700 Subject: [PATCH 086/101] netfs: Only create /proc/fs/netfs with CONFIG_PROC_FS When testing a special config: CONFIG_NETFS_SUPPORTS=y CONFIG_PROC_FS=n The system crashes with something like: [ 3.766197] ------------[ cut here ]------------ [ 3.766484] kernel BUG at mm/mempool.c:560! [ 3.766789] Oops: invalid opcode: 0000 [#1] SMP NOPTI [ 3.767123] CPU: 0 UID: 0 PID: 1 Comm: swapper/0 Tainted: G W [ 3.767777] Tainted: [W]=WARN [ 3.767968] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), [ 3.768523] RIP: 0010:mempool_alloc_slab.cold+0x17/0x19 [ 3.768847] Code: 50 fe ff 58 5b 5d 41 5c 41 5d 41 5e 41 5f e9 93 95 13 00 [ 3.769977] RSP: 0018:ffffc90000013998 EFLAGS: 00010286 [ 3.770315] RAX: 000000000000002f RBX: ffff888100ba8640 RCX: 0000000000000000 [ 3.770749] RDX: 0000000000000000 RSI: 0000000000000003 RDI: 00000000ffffffff [ 3.771217] RBP: 0000000000092880 R08: 0000000000000000 R09: ffffc90000013828 [ 3.771664] R10: 0000000000000001 R11: 00000000ffffffea R12: 0000000000092cc0 [ 3.772117] R13: 0000000000000400 R14: ffff8881004b1620 R15: ffffea0004ef7e40 [ 3.772554] FS: 0000000000000000(0000) GS:ffff8881b5f3c000(0000) knlGS:0000000000000000 [ 3.773061] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 3.773443] CR2: ffffffff830901b4 CR3: 0000000004296001 CR4: 0000000000770ef0 [ 3.773884] PKRU: 55555554 [ 3.774058] Call Trace: [ 3.774232] [ 3.774371] mempool_alloc_noprof+0x6a/0x190 [ 3.774649] ? _printk+0x57/0x80 [ 3.774862] netfs_alloc_request+0x85/0x2ce [ 3.775147] netfs_readahead+0x28/0x170 [ 3.775395] read_pages+0x6c/0x350 [ 3.775623] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.775928] page_cache_ra_unbounded+0x1bd/0x2a0 [ 3.776247] filemap_get_pages+0x139/0x970 [ 3.776510] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.776820] filemap_read+0xf9/0x580 [ 3.777054] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.777368] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.777674] ? find_held_lock+0x32/0x90 [ 3.777929] ? netfs_start_io_read+0x19/0x70 [ 3.778221] ? netfs_start_io_read+0x19/0x70 [ 3.778489] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.778800] ? lock_acquired+0x1e6/0x450 [ 3.779054] ? srso_alias_return_thunk+0x5/0xfbef5 [ 3.779379] netfs_buffered_read_iter+0x57/0x80 [ 3.779670] __kernel_read+0x158/0x2c0 [ 3.779927] bprm_execve+0x300/0x7a0 [ 3.780185] kernel_execve+0x10c/0x140 [ 3.780423] ? __pfx_kernel_init+0x10/0x10 [ 3.780690] kernel_init+0xd5/0x150 [ 3.780910] ret_from_fork+0x2d/0x50 [ 3.781156] ? __pfx_kernel_init+0x10/0x10 [ 3.781414] ret_from_fork_asm+0x1a/0x30 [ 3.781677] [ 3.781823] Modules linked in: [ 3.782065] ---[ end trace 0000000000000000 ]--- This is caused by the following error path in netfs_init(): if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; Fix this by adding ifdef in netfs_main(), so that /proc/fs/netfs is only created with CONFIG_PROC_FS. Signed-off-by: Song Liu Link: https://lore.kernel.org/20250409170015.2651829-1-song@kernel.org Acked-by: David Howells Signed-off-by: Christian Brauner --- fs/netfs/main.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 4e3e62040831..70ecc8f5f210 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -127,11 +127,13 @@ static int __init netfs_init(void) if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0) goto error_subreqpool; +#ifdef CONFIG_PROC_FS if (!proc_mkdir("fs/netfs", NULL)) goto error_proc; if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL, &netfs_requests_seq_ops)) goto error_procfile; +#endif #ifdef CONFIG_FSCACHE_STATS if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL, netfs_stats_show)) @@ -144,9 +146,11 @@ static int __init netfs_init(void) return 0; error_fscache: +#ifdef CONFIG_PROC_FS error_procfile: remove_proc_subtree("fs/netfs", NULL); error_proc: +#endif mempool_exit(&netfs_subrequest_pool); error_subreqpool: kmem_cache_destroy(netfs_subrequest_slab); From b463d7fd118b984884da7493ae999a62c9892aa3 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Wed, 9 Apr 2025 15:05:34 -0700 Subject: [PATCH 087/101] fs: Fix filename init after recent refactoring getname_flags() should save __user pointer "filename" in filename->uptr. However, this logic is broken by a recent refactoring. Fix it by passing __user pointer filename to helper initname(). Fixes: 611851010c74 ("fs: dedup handling of struct filename init and refcounts bumps") Cc: Mateusz Guzik Cc: Christian Brauner Signed-off-by: Song Liu Link: https://lore.kernel.org/20250409220534.3635801-1-song@kernel.org Signed-off-by: Christian Brauner --- fs/namei.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/fs/namei.c b/fs/namei.c index 360a86ca1f02..8510ff53f12e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -125,9 +125,9 @@ #define EMBEDDED_NAME_MAX (PATH_MAX - offsetof(struct filename, iname)) -static inline void initname(struct filename *name) +static inline void initname(struct filename *name, const char __user *uptr) { - name->uptr = NULL; + name->uptr = uptr; name->aname = NULL; atomic_set(&name->refcnt, 1); } @@ -210,7 +210,7 @@ getname_flags(const char __user *filename, int flags) return ERR_PTR(-ENAMETOOLONG); } } - initname(result); + initname(result, filename); audit_getname(result); return result; } @@ -268,7 +268,7 @@ struct filename *getname_kernel(const char * filename) return ERR_PTR(-ENAMETOOLONG); } memcpy((char *)result->name, filename, len); - initname(result); + initname(result, NULL); audit_getname(result); return result; } From 8e3c15ee0d292c413c66fe10201d1b035a0bea72 Mon Sep 17 00:00:00 2001 From: Gou Hao Date: Thu, 10 Apr 2025 15:12:36 +0800 Subject: [PATCH 088/101] iomap: skip unnecessary ifs_block_is_uptodate check In iomap_adjust_read_range, i is either the first !uptodate block, or it is past last for the second loop looking for trailing uptodate blocks. Assuming there's no overflow (there's no combination of huge folios and tiny blksize) then yeah, there is no point in retesting that the same block pointed to by i is uptodate since we hold the folio lock so nobody else could have set it uptodate. Signed-off-by: Gou Hao Link: https://lore.kernel.org/20250410071236.16017-1-gouhao@uniontech.com Reviewed-by: "Darrick J. Wong" Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Signed-off-by: Christian Brauner --- fs/iomap/buffered-io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 31553372b33a..5b08bd417b28 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -259,7 +259,7 @@ static void iomap_adjust_read_range(struct inode *inode, struct folio *folio, } /* truncate len if we find any trailing uptodate block(s) */ - for ( ; i <= last; i++) { + while (++i <= last) { if (ifs_block_is_uptodate(ifs, i)) { plen -= (last - i + 1) * block_size; last = i - 1; From 47a742fd977a7a8c39fea890712e9bfdf76f98f1 Mon Sep 17 00:00:00 2001 From: Jan Stancek Date: Thu, 10 Apr 2025 17:05:42 +0200 Subject: [PATCH 089/101] fs: use namespace_{lock,unlock} in dissolve_on_fput() In commit b73ec10a4587 ("fs: add fastpath for dissolve_on_fput()"), the namespace_{lock,unlock} has been replaced with scoped_guard using the namespace_sem. This however now also skips processing of 'unmounted' list in namespace_unlock(), and mount is not (immediately) cleaned up. For example, this causes LTP move_mount02 fail: ... move_mount02.c:80: TPASS: invalid-from-fd: move_mount() failed as expected: EBADF (9) move_mount02.c:80: TPASS: invalid-from-path: move_mount() failed as expected: ENOENT (2) move_mount02.c:80: TPASS: invalid-to-fd: move_mount() failed as expected: EBADF (9) move_mount02.c:80: TPASS: invalid-to-path: move_mount() failed as expected: ENOENT (2) move_mount02.c:80: TPASS: invalid-flags: move_mount() failed as expected: EINVAL (22) tst_test.c:1833: TINFO: === Testing on ext3 === tst_test.c:1170: TINFO: Formatting /dev/loop0 with ext3 opts='' extra opts='' mke2fs 1.47.2 (1-Jan-2025) /dev/loop0 is apparently in use by the system; will not make a filesystem here! tst_test.c:1170: TBROK: mkfs.ext3 failed with exit code 1 The test makes number of move_mount() calls but these are all designed to fail with specific errno. Even after test, 'losetup -d' can't detach loop device. Define a new guard for dissolve_on_fput, that will use namespace_{lock,unlock}. Fixes: b73ec10a4587 ("fs: add fastpath for dissolve_on_fput()") Signed-off-by: Jan Stancek Link: https://lore.kernel.org/cad2f042b886bf0ced3d8e3aff120ec5e0125d61.1744297468.git.jstancek@redhat.com Signed-off-by: Christian Brauner --- fs/namespace.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/fs/namespace.c b/fs/namespace.c index 1eb76d6312de..d9ca80dcc544 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1830,6 +1830,8 @@ static inline void namespace_lock(void) down_write(&namespace_sem); } +DEFINE_GUARD(namespace_lock, struct rw_semaphore *, namespace_lock(), namespace_unlock()) + enum umount_tree_flags { UMOUNT_SYNC = 1, UMOUNT_PROPAGATE = 2, @@ -2383,7 +2385,7 @@ void dissolve_on_fput(struct vfsmount *mnt) return; } - scoped_guard(rwsem_write, &namespace_sem) { + scoped_guard(namespace_lock, &namespace_sem) { ns = m->mnt_ns; if (!must_dissolve(ns)) return; From 2b70702917337a8d6d07f03eed961e0119091647 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 9 Apr 2025 18:02:52 -0700 Subject: [PATCH 090/101] perf tools: Remove evsel__handle_error_quirks() The evsel__handle_error_quirks() is to fixup invalid event attributes on some architecture based on the error code. Currently it's only used for AMD to disable precise_ip not to use IBS which has more restrictions. But the commit c33aea446bf555ab changed call evsel__precise_ip_fallback for any errors so there's no difference with the above function. To make matter worse, it caused a problem with branch stack on Zen3. The IBS doesn't support branch stack so it should use a regular core PMU event. The default event is set precise_max and it starts with 3. And evsel__precise_ip_fallback() tries with it and reduces the level one by one. At last it tries with 0 but it also failed on Zen3 since the branch stack is not supported for the cycles event. At this point, evsel__precise_ip_fallback() restores the original precise_ip value (3) in the hope that it can succeed with other modifier (like exclude_kernel). Then evsel__handle_error_quirks() see it has precise_ip != 0 and make it retry with 0. This created an infinite loop. Before: $ perf record -b -vv |& grep removing removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD removing precise_ip on AMD ... After: $ perf record -b true Error: Failure to open event 'cycles:P' on PMU 'cpu' which will be removed. Invalid event (cycles:P) in per-thread mode, enable system wide with '-a'. Error: Failure to open any events for recording. Fixes: c33aea446bf555ab ("perf tools: Fix precise_ip fallback logic") Tested-by: Chun-Tse Shao Cc: Ravi Bangoria Link: https://lore.kernel.org/r/20250410010252.402221-1-namhyung@kernel.org Signed-off-by: Namhyung Kim --- tools/perf/util/evsel.c | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1974395492d7..3c030da2e477 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2566,25 +2566,6 @@ check: return false; } -static bool evsel__handle_error_quirks(struct evsel *evsel, int error) -{ - /* - * AMD core PMU tries to forward events with precise_ip to IBS PMU - * implicitly. But IBS PMU has more restrictions so it can fail with - * supported event attributes. Let's forward it back to the core PMU - * by clearing precise_ip only if it's from precise_max (:P). - */ - if ((error == -EINVAL || error == -ENOENT) && x86__is_amd_cpu() && - evsel->core.attr.precise_ip && evsel->precise_max) { - evsel->core.attr.precise_ip = 0; - pr_debug2_peo("removing precise_ip on AMD\n"); - display_attr(&evsel->core.attr); - return true; - } - - return false; -} - static int evsel__open_cpu(struct evsel *evsel, struct perf_cpu_map *cpus, struct perf_thread_map *threads, int start_cpu_map_idx, int end_cpu_map_idx) @@ -2730,9 +2711,6 @@ try_fallback: if (evsel__precise_ip_fallback(evsel)) goto retry_open; - if (evsel__handle_error_quirks(evsel, err)) - goto retry_open; - out_close: if (err) threads->err_thread = thread; From 3846389c03a8518884f09056611619bd1461ffc7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 10:41:39 +0200 Subject: [PATCH 091/101] x86/platform/amd: Move the header to Collect AMD specific platform header files in . Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Link: https://lore.kernel.org/r/20250413084144.3746608-2-mingo@kernel.org --- arch/x86/events/amd/ibs.c | 2 +- arch/x86/include/asm/{amd-ibs.h => amd/ibs.h} | 0 tools/perf/check-headers.sh | 2 +- tools/perf/util/amd-sample-raw.c | 2 +- 4 files changed, 3 insertions(+), 3 deletions(-) rename arch/x86/include/asm/{amd-ibs.h => amd/ibs.h} (100%) diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 0252b7ea8bca..172619932fe3 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -26,7 +26,7 @@ static u32 ibs_caps; #include #include -#include +#include /* attr.config2 */ #define IBS_SW_FILTER_MASK 1 diff --git a/arch/x86/include/asm/amd-ibs.h b/arch/x86/include/asm/amd/ibs.h similarity index 100% rename from arch/x86/include/asm/amd-ibs.h rename to arch/x86/include/asm/amd/ibs.h diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index a4499e5a6f9c..493a13830419 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -185,7 +185,7 @@ done # diff with extra ignore lines check arch/x86/lib/memcpy_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memcpy_\(erms\|orig\))" -I"^#include "' check arch/x86/lib/memset_64.S '-I "^EXPORT_SYMBOL" -I "^#include " -I"^SYM_FUNC_START\(_LOCAL\)*(memset_\(erms\|orig\))"' -check arch/x86/include/asm/amd-ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' +check arch/x86/include/asm/amd/ibs.h '-I "^#include [<\"]\(asm/\)*msr-index.h"' check arch/arm64/include/asm/cputype.h '-I "^#include [<\"]\(asm/\)*sysreg.h"' check include/linux/unaligned.h '-I "^#include " -I "^#include " -I "^#pragma GCC diagnostic"' check include/uapi/asm-generic/mman.h '-I "^#include <\(uapi/\)*asm-generic/mman-common\(-tools\)*.h>"' diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 9d0ce88e90e4..456ce64ad822 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -9,7 +9,7 @@ #include #include -#include "../../arch/x86/include/asm/amd-ibs.h" +#include "../../arch/x86/include/asm/amd/ibs.h" #include "debug.h" #include "session.h" From 861c6b1185fbb2e3fa158aa3aca75d2f767db2a8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 10:41:40 +0200 Subject: [PATCH 092/101] x86/platform/amd: Add standard header guards to Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Link: https://lore.kernel.org/r/20250413084144.3746608-3-mingo@kernel.org --- arch/x86/include/asm/amd/ibs.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/arch/x86/include/asm/amd/ibs.h b/arch/x86/include/asm/amd/ibs.h index 77f3a589a99a..3ee5903982c2 100644 --- a/arch/x86/include/asm/amd/ibs.h +++ b/arch/x86/include/asm/amd/ibs.h @@ -1,4 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_X86_AMD_IBS_H +#define _ASM_X86_AMD_IBS_H + /* * From PPR Vol 1 for AMD Family 19h Model 01h B1 * 55898 Rev 0.35 - Feb 5, 2021 @@ -151,3 +154,5 @@ struct perf_ibs_data { }; u64 regs[MSR_AMD64_IBS_REG_COUNT_MAX]; }; + +#endif /* _ASM_X86_AMD_IBS_H */ From bcbb65559532891148d990527e9df6b8fc98e98d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 14 Apr 2025 09:32:04 +0200 Subject: [PATCH 093/101] x86/platform/amd: Move the header to Collect AMD specific platform header files in . Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Link: https://lore.kernel.org/r/20250413084144.3746608-4-mingo@kernel.org --- arch/x86/include/asm/{amd_nb.h => amd/nb.h} | 0 arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/amd_nb.c | 2 +- arch/x86/kernel/aperture_64.c | 2 +- arch/x86/kernel/cpu/amd_cache_disable.c | 2 +- arch/x86/kernel/cpu/cacheinfo.c | 2 +- arch/x86/kernel/cpu/mce/inject.c | 2 +- arch/x86/mm/amdtopology.c | 2 +- arch/x86/mm/numa.c | 2 +- arch/x86/pci/amd_bus.c | 2 +- drivers/char/agp/amd64-agp.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/platform/x86/amd/pmc/mp1_stb.c | 2 +- drivers/pnp/quirks.c | 2 +- drivers/ras/amd/atl/internal.h | 2 +- 15 files changed, 14 insertions(+), 14 deletions(-) rename arch/x86/include/asm/{amd_nb.h => amd/nb.h} (100%) diff --git a/arch/x86/include/asm/amd_nb.h b/arch/x86/include/asm/amd/nb.h similarity index 100% rename from arch/x86/include/asm/amd_nb.h rename to arch/x86/include/asm/amd/nb.h diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index c884deca839b..3485d419c2f5 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include static unsigned long iommu_bus_base; /* GART remapping area (physical) */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 5a8cc48f80be..ffaad175cee2 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include static u32 *flush_words; diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 89c0c8a3fc7e..769321185a08 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/kernel/cpu/amd_cache_disable.c b/arch/x86/kernel/cpu/amd_cache_disable.c index d860ad3f8a5a..8843b9557aea 100644 --- a/arch/x86/kernel/cpu/amd_cache_disable.c +++ b/arch/x86/kernel/cpu/amd_cache_disable.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c index cc7ae2bdcf4a..f866d94352fb 100644 --- a/arch/x86/kernel/cpu/cacheinfo.c +++ b/arch/x86/kernel/cpu/cacheinfo.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/mce/inject.c b/arch/x86/kernel/cpu/mce/inject.c index 06e3cf7229ce..bb060f8326ef 100644 --- a/arch/x86/kernel/cpu/mce/inject.c +++ b/arch/x86/kernel/cpu/mce/inject.c @@ -24,7 +24,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/x86/mm/amdtopology.c b/arch/x86/mm/amdtopology.c index 628833afee37..f980b0eb0105 100644 --- a/arch/x86/mm/amdtopology.c +++ b/arch/x86/mm/amdtopology.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include static unsigned char __initdata nodeids[8]; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index 64e5cdb2460a..fed02d1073c7 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include "numa_internal.h" diff --git a/arch/x86/pci/amd_bus.c b/arch/x86/pci/amd_bus.c index 631512f7ec85..95ae1971a5f1 100644 --- a/arch/x86/pci/amd_bus.c +++ b/arch/x86/pci/amd_bus.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index 8e41731d3642..bf490967241a 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -16,7 +16,7 @@ #include #include /* PAGE_SIZE */ #include -#include +#include #include #include "agp.h" diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 90f0eb7cc5b9..417940f455d3 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -2,7 +2,7 @@ #include #include #include "amd64_edac.h" -#include +#include #include static struct edac_pci_ctl_info *pci_ctl; diff --git a/drivers/platform/x86/amd/pmc/mp1_stb.c b/drivers/platform/x86/amd/pmc/mp1_stb.c index c005f00988f7..3b9b9f30faa3 100644 --- a/drivers/platform/x86/amd/pmc/mp1_stb.c +++ b/drivers/platform/x86/amd/pmc/mp1_stb.c @@ -11,7 +11,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/pnp/quirks.c b/drivers/pnp/quirks.c index 6085a1471de2..6e1d4bfd28ac 100644 --- a/drivers/pnp/quirks.c +++ b/drivers/pnp/quirks.c @@ -290,7 +290,7 @@ static void quirk_system_pci_resources(struct pnp_dev *dev) #ifdef CONFIG_AMD_NB -#include +#include static void quirk_amd_mmconfig_area(struct pnp_dev *dev) { diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index f9be26d25348..c63fee30cc18 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include "reg_fields.h" From d96c78684166bc0e7997d12d845882cb5824eed3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 10:41:42 +0200 Subject: [PATCH 094/101] x86/platform/amd: Move the header to Collect AMD specific platform header files in . Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: Carlos Bilbao Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Cc: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20250413084144.3746608-5-mingo@kernel.org --- MAINTAINERS | 2 +- arch/x86/include/asm/{amd_hsmp.h => amd/hsmp.h} | 0 drivers/platform/x86/amd/hsmp/acpi.c | 2 +- drivers/platform/x86/amd/hsmp/hsmp.c | 2 +- drivers/platform/x86/amd/hsmp/plat.c | 2 +- 5 files changed, 4 insertions(+), 4 deletions(-) rename arch/x86/include/asm/{amd_hsmp.h => amd/hsmp.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index 61b9cc04c814..d4ea6d78db51 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1097,7 +1097,7 @@ R: Carlos Bilbao L: platform-driver-x86@vger.kernel.org S: Maintained F: Documentation/arch/x86/amd_hsmp.rst -F: arch/x86/include/asm/amd_hsmp.h +F: arch/x86/include/asm/amd/hsmp.h F: arch/x86/include/uapi/asm/amd_hsmp.h F: drivers/platform/x86/amd/hsmp/ diff --git a/arch/x86/include/asm/amd_hsmp.h b/arch/x86/include/asm/amd/hsmp.h similarity index 100% rename from arch/x86/include/asm/amd_hsmp.h rename to arch/x86/include/asm/amd/hsmp.h diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index c1eccb3c80c5..3c7acb90c3ab 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -9,7 +9,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include diff --git a/drivers/platform/x86/amd/hsmp/hsmp.c b/drivers/platform/x86/amd/hsmp/hsmp.c index a3ac09a90de4..e262e8a97b45 100644 --- a/drivers/platform/x86/amd/hsmp/hsmp.c +++ b/drivers/platform/x86/amd/hsmp/hsmp.c @@ -7,7 +7,7 @@ * This file provides a device implementation for HSMP interface */ -#include +#include #include #include diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c index b9782a078dbd..0eb73fcb9806 100644 --- a/drivers/platform/x86/amd/hsmp/plat.c +++ b/drivers/platform/x86/amd/hsmp/plat.c @@ -9,7 +9,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include From 5bb144e52c666dc0082a90662a5406133415cacc Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 10:41:43 +0200 Subject: [PATCH 095/101] x86/platform/amd: Clean up the header guards a bit - There's no need for a newline after the SPDX line - But there's a need for one before the closing header guard. Collect AMD specific platform header files in . Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: Carlos Bilbao Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Cc: Naveen Krishna Chatradhi Link: https://lore.kernel.org/r/20250413084144.3746608-6-mingo@kernel.org --- arch/x86/include/asm/amd/hsmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/include/asm/amd/hsmp.h b/arch/x86/include/asm/amd/hsmp.h index 03c2ce3edaf5..2137f62853ed 100644 --- a/arch/x86/include/asm/amd/hsmp.h +++ b/arch/x86/include/asm/amd/hsmp.h @@ -1,5 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ - #ifndef _ASM_X86_AMD_HSMP_H_ #define _ASM_X86_AMD_HSMP_H_ @@ -13,4 +12,5 @@ static inline int hsmp_send_message(struct hsmp_message *msg) return -ENODEV; } #endif + #endif /*_ASM_X86_AMD_HSMP_H_*/ From 0a35c9280a9105e601cfe23b7c15522a195fa412 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 13 Apr 2025 10:41:44 +0200 Subject: [PATCH 096/101] x86/platform/amd: Move the header to Collect AMD specific platform header files in . Signed-off-by: Ingo Molnar Acked-by: Borislav Petkov (AMD) Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Mario Limonciello Link: https://lore.kernel.org/r/20250413084144.3746608-7-mingo@kernel.org --- MAINTAINERS | 2 +- arch/x86/include/asm/amd/nb.h | 2 +- arch/x86/include/asm/{amd_node.h => amd/node.h} | 0 arch/x86/kernel/amd_node.c | 2 +- arch/x86/pci/fixup.c | 2 +- drivers/edac/amd64_edac.c | 2 +- drivers/hwmon/k10temp.c | 2 +- drivers/platform/x86/amd/hsmp/acpi.c | 2 +- drivers/platform/x86/amd/hsmp/plat.c | 2 +- drivers/platform/x86/amd/pmc/pmc.c | 2 +- drivers/platform/x86/amd/pmf/core.c | 2 +- drivers/ras/amd/atl/internal.h | 2 +- sound/soc/amd/acp/acp-rembrandt.c | 2 +- sound/soc/amd/acp/acp63.c | 2 +- sound/soc/amd/acp/acp70.c | 2 +- sound/soc/sof/amd/acp.c | 2 +- 16 files changed, 15 insertions(+), 15 deletions(-) rename arch/x86/include/asm/{amd_node.h => amd/node.h} (100%) diff --git a/MAINTAINERS b/MAINTAINERS index d4ea6d78db51..14bbb43a19ba 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1142,7 +1142,7 @@ M: Mario Limonciello M: Yazen Ghannam L: linux-kernel@vger.kernel.org S: Supported -F: arch/x86/include/asm/amd_node.h +F: arch/x86/include/asm/amd/node.h F: arch/x86/kernel/amd_node.c AMD PDS CORE DRIVER diff --git a/arch/x86/include/asm/amd/nb.h b/arch/x86/include/asm/amd/nb.h index adfa0854cf2d..ddb5108cf46c 100644 --- a/arch/x86/include/asm/amd/nb.h +++ b/arch/x86/include/asm/amd/nb.h @@ -4,7 +4,7 @@ #include #include -#include +#include struct amd_nb_bus_dev_range { u8 bus; diff --git a/arch/x86/include/asm/amd_node.h b/arch/x86/include/asm/amd/node.h similarity index 100% rename from arch/x86/include/asm/amd_node.h rename to arch/x86/include/asm/amd/node.h diff --git a/arch/x86/kernel/amd_node.c b/arch/x86/kernel/amd_node.c index b670fa85c61b..a40176b62eb5 100644 --- a/arch/x86/kernel/amd_node.c +++ b/arch/x86/kernel/amd_node.c @@ -9,7 +9,7 @@ */ #include -#include +#include /* * AMD Nodes are a physical collection of I/O devices within an SoC. There can be one diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c index efefeb82ab61..36336299596b 100644 --- a/arch/x86/pci/fixup.c +++ b/arch/x86/pci/fixup.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/edac/amd64_edac.c b/drivers/edac/amd64_edac.c index 417940f455d3..25180165ca8e 100644 --- a/drivers/edac/amd64_edac.c +++ b/drivers/edac/amd64_edac.c @@ -3,7 +3,7 @@ #include #include "amd64_edac.h" #include -#include +#include static struct edac_pci_ctl_info *pci_ctl; diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index 3685906cc57c..472bcf6092f6 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor"); diff --git a/drivers/platform/x86/amd/hsmp/acpi.c b/drivers/platform/x86/amd/hsmp/acpi.c index 3c7acb90c3ab..02e22c1f3a1d 100644 --- a/drivers/platform/x86/amd/hsmp/acpi.c +++ b/drivers/platform/x86/amd/hsmp/acpi.c @@ -23,7 +23,7 @@ #include -#include +#include #include "hsmp.h" diff --git a/drivers/platform/x86/amd/hsmp/plat.c b/drivers/platform/x86/amd/hsmp/plat.c index 0eb73fcb9806..c9a1b1ed4224 100644 --- a/drivers/platform/x86/amd/hsmp/plat.c +++ b/drivers/platform/x86/amd/hsmp/plat.c @@ -18,7 +18,7 @@ #include #include -#include +#include #include "hsmp.h" diff --git a/drivers/platform/x86/amd/pmc/pmc.c b/drivers/platform/x86/amd/pmc/pmc.c index d789d6cab794..e5c4e6be37f7 100644 --- a/drivers/platform/x86/amd/pmc/pmc.c +++ b/drivers/platform/x86/amd/pmc/pmc.c @@ -28,7 +28,7 @@ #include #include -#include +#include #include "pmc.h" diff --git a/drivers/platform/x86/amd/pmf/core.c b/drivers/platform/x86/amd/pmf/core.c index a2cb2d5544f5..cecadae3f3bf 100644 --- a/drivers/platform/x86/amd/pmf/core.c +++ b/drivers/platform/x86/amd/pmf/core.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include "pmf.h" /* PMF-SMU communication registers */ diff --git a/drivers/ras/amd/atl/internal.h b/drivers/ras/amd/atl/internal.h index c63fee30cc18..05bbee20eeaf 100644 --- a/drivers/ras/amd/atl/internal.h +++ b/drivers/ras/amd/atl/internal.h @@ -18,7 +18,7 @@ #include #include -#include +#include #include "reg_fields.h" diff --git a/sound/soc/amd/acp/acp-rembrandt.c b/sound/soc/amd/acp/acp-rembrandt.c index 746b6ed72029..c30a7b59fbff 100644 --- a/sound/soc/amd/acp/acp-rembrandt.c +++ b/sound/soc/amd/acp/acp-rembrandt.c @@ -22,7 +22,7 @@ #include #include -#include +#include #include "amd.h" #include "../mach-config.h" diff --git a/sound/soc/amd/acp/acp63.c b/sound/soc/amd/acp/acp63.c index 52d895e624c7..0ddde14da7d5 100644 --- a/sound/soc/amd/acp/acp63.c +++ b/sound/soc/amd/acp/acp63.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include "amd.h" #include "acp-mach.h" diff --git a/sound/soc/amd/acp/acp70.c b/sound/soc/amd/acp/acp70.c index 6d5f5ade075c..7f4a25b46576 100644 --- a/sound/soc/amd/acp/acp70.c +++ b/sound/soc/amd/acp/acp70.c @@ -23,7 +23,7 @@ #include "amd.h" #include "acp-mach.h" -#include +#include #define DRV_NAME "acp_asoc_acp70" diff --git a/sound/soc/sof/amd/acp.c b/sound/soc/sof/amd/acp.c index 7c6d647fa253..7e6f10726ff0 100644 --- a/sound/soc/sof/amd/acp.c +++ b/sound/soc/sof/amd/acp.c @@ -16,7 +16,7 @@ #include #include -#include +#include #include "../ops.h" #include "acp.h" From 9fb6938d55348ee3e47deff9646fae59a15ed1c9 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Apr 2025 11:41:29 +0200 Subject: [PATCH 097/101] x86/cpuid: Align macro linebreaks vertically Align the backspaces vertically again, after recent cleanups. No functional changes. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Ahmed S. Darwish Cc: H. Peter Anvin Cc: Linus Torvalds Link: https://lore.kernel.org/r/20250414094130.6768-1-bp@kernel.org --- arch/x86/include/asm/cpuid/api.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/include/asm/cpuid/api.h b/arch/x86/include/asm/cpuid/api.h index bf75c6267311..bf76a1706d02 100644 --- a/arch/x86/include/asm/cpuid/api.h +++ b/arch/x86/include/asm/cpuid/api.h @@ -36,9 +36,9 @@ static inline void native_cpuid(u32 *eax, u32 *ebx, } #define NATIVE_CPUID_REG(reg) \ -static inline u32 native_cpuid_##reg(u32 op) \ +static inline u32 native_cpuid_##reg(u32 op) \ { \ - u32 eax = op, ebx, ecx = 0, edx; \ + u32 eax = op, ebx, ecx = 0, edx; \ \ native_cpuid(&eax, &ebx, &ecx, &edx); \ \ From dd86a1d013e0c94fedd060514b9e7be2988ef320 Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Mon, 14 Apr 2025 17:09:51 +0200 Subject: [PATCH 098/101] x86/bugs: Remove X86_BUG_MMIO_UNKNOWN Whack this thing because: - the "unknown" handling is done only for this vuln and not for the others - it doesn't do anything besides reporting things differently. It doesn't apply any mitigations - it is simply causing unnecessary complications to the code which don't bring anything besides maintenance overhead to what is already a very nasty spaghetti pile - all the currently unaffected CPUs can also be in "unknown" status so there's no need for special handling here so get rid of it. Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Ingo Molnar Cc: Andrew Cooper Cc: David Kaplan Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Pawan Gupta Link: https://lore.kernel.org/r/20250414150951.5345-1-bp@kernel.org --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/bugs.c | 12 +----------- arch/x86/kernel/cpu/common.c | 5 ----- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 4 files changed, 3 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 6c2c152d8a67..e8f8d43a3825 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -519,7 +519,7 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index 4386aa6c69e1..a91a1cac6183 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -428,7 +428,6 @@ static const char * const mmio_strings[] = { static void __init mmio_select_mitigation(void) { if (!boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA) || - boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN) || cpu_mitigations_off()) { mmio_mitigation = MMIO_MITIGATION_OFF; return; @@ -591,8 +590,6 @@ out: pr_info("TAA: %s\n", taa_strings[taa_mitigation]); if (boot_cpu_has_bug(X86_BUG_MMIO_STALE_DATA)) pr_info("MMIO Stale Data: %s\n", mmio_strings[mmio_mitigation]); - else if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - pr_info("MMIO Stale Data: Unknown: No mitigations\n"); if (boot_cpu_has_bug(X86_BUG_RFDS)) pr_info("Register File Data Sampling: %s\n", rfds_strings[rfds_mitigation]); } @@ -2819,9 +2816,6 @@ static ssize_t tsx_async_abort_show_state(char *buf) static ssize_t mmio_stale_data_show_state(char *buf) { - if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - return sysfs_emit(buf, "Unknown: No mitigations\n"); - if (mmio_mitigation == MMIO_MITIGATION_OFF) return sysfs_emit(buf, "%s\n", mmio_strings[mmio_mitigation]); @@ -3006,7 +3000,6 @@ static ssize_t cpu_show_common(struct device *dev, struct device_attribute *attr return srbds_show_state(buf); case X86_BUG_MMIO_STALE_DATA: - case X86_BUG_MMIO_UNKNOWN: return mmio_stale_data_show_state(buf); case X86_BUG_RETBLEED: @@ -3075,10 +3068,7 @@ ssize_t cpu_show_srbds(struct device *dev, struct device_attribute *attr, char * ssize_t cpu_show_mmio_stale_data(struct device *dev, struct device_attribute *attr, char *buf) { - if (boot_cpu_has_bug(X86_BUG_MMIO_UNKNOWN)) - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_UNKNOWN); - else - return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); + return cpu_show_common(dev, attr, buf, X86_BUG_MMIO_STALE_DATA); } ssize_t cpu_show_retbleed(struct device *dev, struct device_attribute *attr, char *buf) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 12126adbc3a9..4ada55f126ae 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1402,15 +1402,10 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) * Affected CPU list is generally enough to enumerate the vulnerability, * but for virtualization case check for ARCH_CAP MSR bits also, VMM may * not want the guest to enumerate the bug. - * - * Set X86_BUG_MMIO_UNKNOWN for CPUs that are neither in the blacklist, - * nor in the whitelist and also don't enumerate MSR ARCH_CAP MMIO bits. */ if (!arch_cap_mmio_immune(x86_arch_cap_msr)) { if (cpu_matches(cpu_vuln_blacklist, MMIO)) setup_force_cpu_bug(X86_BUG_MMIO_STALE_DATA); - else if (!cpu_matches(cpu_vuln_whitelist, NO_MMIO)) - setup_force_cpu_bug(X86_BUG_MMIO_UNKNOWN); } if (!cpu_has(c, X86_FEATURE_BTC_NO)) { diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 9e3fa7942e7d..e88500d90309 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -508,7 +508,7 @@ #define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* "itlb_multihit" CPU may incur MCE during certain page attribute changes */ #define X86_BUG_SRBDS X86_BUG(24) /* "srbds" CPU may leak RNG bits if not mitigated */ #define X86_BUG_MMIO_STALE_DATA X86_BUG(25) /* "mmio_stale_data" CPU is affected by Processor MMIO Stale Data vulnerabilities */ -#define X86_BUG_MMIO_UNKNOWN X86_BUG(26) /* "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ +/* unused, was #define X86_BUG_MMIO_UNKNOWN X86_BUG(26) * "mmio_unknown" CPU is too old and its MMIO Stale Data status is unknown */ #define X86_BUG_RETBLEED X86_BUG(27) /* "retbleed" CPU is affected by RETBleed */ #define X86_BUG_EIBRS_PBRSB X86_BUG(28) /* "eibrs_pbrsb" EIBRS is vulnerable to Post Barrier RSB Predictions */ #define X86_BUG_SMT_RSB X86_BUG(29) /* "smt_rsb" CPU is vulnerable to Cross-Thread Return Address Predictions */ From 282cc5b6762310a73255c6dd1d79de1609042eeb Mon Sep 17 00:00:00 2001 From: "Borislav Petkov (AMD)" Date: Tue, 15 Apr 2025 10:54:08 -0700 Subject: [PATCH 099/101] x86/cpufeatures: Clean up formatting It is a special file with special formatting so remove one whitespace damage and format newer defines like the rest. No functional changes. [ Xin: Do the same to tools/arch/x86/include/asm/cpufeatures.h. ] Signed-off-by: Borislav Petkov (AMD) Signed-off-by: Xin Li (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250415175410.2944032-2-xin@zytor.com --- arch/x86/include/asm/cpufeatures.h | 20 ++++++++++---------- tools/arch/x86/include/asm/cpufeatures.h | 18 ++++++++++-------- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index e8f8d43a3825..60b4a4c00491 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -477,10 +477,10 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ -#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ -#define X86_FEATURE_PREFER_YMM (21*32 + 8) /* Avoid ZMM registers due to downclocking */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -527,10 +527,10 @@ #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ -#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ -#define X86_BUG_SPECTRE_V2_USER X86_BUG(1*32 + 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e88500d90309..2e219be047d4 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -467,9 +467,10 @@ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ -#define X86_FEATURE_AMD_FAST_CPPC (21*32 + 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32 + 6) /* Heterogeneous Core Topology */ -#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32 + 7) /* Workload Classification */ +#define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ +#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ +#define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ /* * BUG word(s) @@ -516,9 +517,10 @@ #define X86_BUG_TDX_PW_MCE X86_BUG(31) /* "tdx_pw_mce" CPU may incur #MC if non-TD software does partial write to TDX private memory */ /* BUG word 2 */ -#define X86_BUG_SRSO X86_BUG(1*32 + 0) /* "srso" AMD SRSO bug */ -#define X86_BUG_DIV0 X86_BUG(1*32 + 1) /* "div0" AMD DIV0 speculation bug */ -#define X86_BUG_RFDS X86_BUG(1*32 + 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ -#define X86_BUG_BHI X86_BUG(1*32 + 3) /* "bhi" CPU is affected by Branch History Injection */ -#define X86_BUG_IBPB_NO_RET X86_BUG(1*32 + 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SRSO X86_BUG( 1*32+ 0) /* "srso" AMD SRSO bug */ +#define X86_BUG_DIV0 X86_BUG( 1*32+ 1) /* "div0" AMD DIV0 speculation bug */ +#define X86_BUG_RFDS X86_BUG( 1*32+ 2) /* "rfds" CPU is vulnerable to Register File Data Sampling */ +#define X86_BUG_BHI X86_BUG( 1*32+ 3) /* "bhi" CPU is affected by Branch History Injection */ +#define X86_BUG_IBPB_NO_RET X86_BUG( 1*32+ 4) /* "ibpb_no_ret" IBPB omits return target predictions */ +#define X86_BUG_SPECTRE_V2_USER X86_BUG( 1*32+ 5) /* "spectre_v2_user" CPU is affected by Spectre variant 2 attack between user processes */ #endif /* _ASM_X86_CPUFEATURES_H */ From 13327fada7ff0ae858e28b9515cd7d6ccb5fccc7 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 15 Apr 2025 10:54:09 -0700 Subject: [PATCH 100/101] x86/cpufeatures: Shorten X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT Shorten X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT to X86_FEATURE_CLEAR_BHB_VMEXIT to make the last column aligned consistently in the whole file. There's no need to explain in the name what the mitigation does. No functional changes. Suggested-by: Borislav Petkov (AMD) Signed-off-by: Xin Li (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250415175410.2944032-3-xin@zytor.com --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/include/asm/nospec-branch.h | 2 +- arch/x86/kernel/cpu/bugs.c | 6 +++--- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 60b4a4c00491..bd27a1d70177 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -476,7 +476,7 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 8a5cc8e70439..707ee5298d2a 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -327,7 +327,7 @@ .endm .macro CLEAR_BRANCH_HISTORY_VMEXIT - ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT + ALTERNATIVE "", "call clear_bhb_loop", X86_FEATURE_CLEAR_BHB_VMEXIT .endm #else #define CLEAR_BRANCH_HISTORY diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index a91a1cac6183..3228f5dd6bd7 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c @@ -1701,13 +1701,13 @@ static void __init bhi_select_mitigation(void) if (bhi_mitigation == BHI_MITIGATION_VMEXIT_ONLY) { pr_info("Spectre BHI mitigation: SW BHB clearing on VM exit only\n"); - setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT); return; } pr_info("Spectre BHI mitigation: SW BHB clearing on syscall and VM exit\n"); setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP); - setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT); + setup_force_cpu_cap(X86_FEATURE_CLEAR_BHB_VMEXIT); } static void __init spectre_v2_select_mitigation(void) @@ -2891,7 +2891,7 @@ static const char *spectre_bhi_state(void) !boot_cpu_has(X86_FEATURE_RETPOLINE_LFENCE) && rrsba_disabled) return "; BHI: Retpoline"; - else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT)) + else if (boot_cpu_has(X86_FEATURE_CLEAR_BHB_VMEXIT)) return "; BHI: Vulnerable, KVM: SW loop"; return "; BHI: Vulnerable"; diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 2e219be047d4..e10c3f4ab328 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -466,7 +466,7 @@ #define X86_FEATURE_CLEAR_BHB_LOOP (21*32+ 1) /* Clear branch history at syscall entry using SW loop */ #define X86_FEATURE_BHI_CTRL (21*32+ 2) /* BHI_DIS_S HW control available */ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ -#define X86_FEATURE_CLEAR_BHB_LOOP_ON_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ +#define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ #define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ From 3aba0b40cacdfba4a604dd09315fa6cdbeb0ed90 Mon Sep 17 00:00:00 2001 From: "Xin Li (Intel)" Date: Tue, 15 Apr 2025 10:54:10 -0700 Subject: [PATCH 101/101] x86/cpufeatures: Shorten X86_FEATURE_AMD_HETEROGENEOUS_CORES Shorten X86_FEATURE_AMD_HETEROGENEOUS_CORES to X86_FEATURE_AMD_HTR_CORES to make the last column aligned consistently in the whole file. No functional changes. Suggested-by: Borislav Petkov (AMD) Signed-off-by: Xin Li (Intel) Signed-off-by: Borislav Petkov (AMD) Link: https://lore.kernel.org/20250415175410.2944032-4-xin@zytor.com --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/acpi/cppc.c | 2 +- arch/x86/kernel/cpu/scattered.c | 2 +- arch/x86/kernel/cpu/topology_amd.c | 2 +- tools/arch/x86/include/asm/cpufeatures.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index bd27a1d70177..bc81b9d1aeca 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -478,7 +478,7 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */ diff --git a/arch/x86/kernel/acpi/cppc.c b/arch/x86/kernel/acpi/cppc.c index 77bfb846490c..62ca714aae77 100644 --- a/arch/x86/kernel/acpi/cppc.c +++ b/arch/x86/kernel/acpi/cppc.c @@ -272,7 +272,7 @@ int amd_get_boost_ratio_numerator(unsigned int cpu, u64 *numerator) } /* detect if running on heterogeneous design */ - if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) { + if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) { switch (core_type) { case TOPO_CPU_TYPE_UNKNOWN: pr_warn("Undefined core type found for cpu %d\n", cpu); diff --git a/arch/x86/kernel/cpu/scattered.c b/arch/x86/kernel/cpu/scattered.c index 16f3ca30626a..c75c57b32b74 100644 --- a/arch/x86/kernel/cpu/scattered.c +++ b/arch/x86/kernel/cpu/scattered.c @@ -53,7 +53,7 @@ static const struct cpuid_bit cpuid_bits[] = { { X86_FEATURE_PERFMON_V2, CPUID_EAX, 0, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_V2, CPUID_EAX, 1, 0x80000022, 0 }, { X86_FEATURE_AMD_LBR_PMC_FREEZE, CPUID_EAX, 2, 0x80000022, 0 }, - { X86_FEATURE_AMD_HETEROGENEOUS_CORES, CPUID_EAX, 30, 0x80000026, 0 }, + { X86_FEATURE_AMD_HTR_CORES, CPUID_EAX, 30, 0x80000026, 0 }, { 0, 0, 0, 0, 0 } }; diff --git a/arch/x86/kernel/cpu/topology_amd.c b/arch/x86/kernel/cpu/topology_amd.c index 03b3c9c3a45e..eb799e2405f9 100644 --- a/arch/x86/kernel/cpu/topology_amd.c +++ b/arch/x86/kernel/cpu/topology_amd.c @@ -182,7 +182,7 @@ static void parse_topology_amd(struct topo_scan *tscan) if (cpu_feature_enabled(X86_FEATURE_TOPOEXT)) has_topoext = cpu_parse_topology_ext(tscan); - if (cpu_feature_enabled(X86_FEATURE_AMD_HETEROGENEOUS_CORES)) + if (cpu_feature_enabled(X86_FEATURE_AMD_HTR_CORES)) tscan->c->topo.cpu_type = cpuid_ebx(0x80000026); if (!has_topoext && !parse_8000_0008(tscan)) diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index e10c3f4ab328..fdbc92aca812 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -468,7 +468,7 @@ #define X86_FEATURE_CLEAR_BHB_HW (21*32+ 3) /* BHI_DIS_S HW control enabled */ #define X86_FEATURE_CLEAR_BHB_VMEXIT (21*32+ 4) /* Clear branch history at vmexit using SW loop */ #define X86_FEATURE_AMD_FAST_CPPC (21*32+ 5) /* Fast CPPC */ -#define X86_FEATURE_AMD_HETEROGENEOUS_CORES (21*32+ 6) /* Heterogeneous Core Topology */ +#define X86_FEATURE_AMD_HTR_CORES (21*32+ 6) /* Heterogeneous Core Topology */ #define X86_FEATURE_AMD_WORKLOAD_CLASS (21*32+ 7) /* Workload Classification */ #define X86_FEATURE_PREFER_YMM (21*32+ 8) /* Avoid ZMM registers due to downclocking */