From aadddd68bde444cd737c376816a29b642da0610e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:30 +0200 Subject: [PATCH 01/52] perf c2c: Introduce c2c_decode_stats function Introducing c2c_decode_stats function, which decodes data_src data into new struct c2c_stats. Signed-off-by: Jiri Olsa Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-3-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 98 ++++++++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 36 +++++++++++++ 2 files changed, 134 insertions(+) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index bbc368e7d1e4..502fcee91973 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -9,6 +9,7 @@ #include "mem-events.h" #include "debug.h" #include "symbol.h" +#include "sort.h" unsigned int perf_mem_events__loads_ldlat = 30; @@ -268,3 +269,100 @@ int perf_script__meminfo_scnprintf(char *out, size_t sz, struct mem_info *mem_in return i; } + +int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) +{ + union perf_mem_data_src *data_src = &mi->data_src; + u64 daddr = mi->daddr.addr; + u64 op = data_src->mem_op; + u64 lvl = data_src->mem_lvl; + u64 snoop = data_src->mem_snoop; + u64 lock = data_src->mem_lock; + int err = 0; + +#define P(a, b) PERF_MEM_##a##_##b + + stats->nr_entries++; + + if (lock & P(LOCK, LOCKED)) stats->locks++; + + if (op & P(OP, LOAD)) { + /* load */ + stats->load++; + + if (!daddr) { + stats->ld_noadrs++; + return -1; + } + + if (lvl & P(LVL, HIT)) { + if (lvl & P(LVL, UNC)) stats->ld_uncache++; + if (lvl & P(LVL, IO)) stats->ld_io++; + if (lvl & P(LVL, LFB)) stats->ld_fbhit++; + if (lvl & P(LVL, L1 )) stats->ld_l1hit++; + if (lvl & P(LVL, L2 )) stats->ld_l2hit++; + if (lvl & P(LVL, L3 )) { + if (snoop & P(SNOOP, HITM)) + stats->lcl_hitm++; + else + stats->ld_llchit++; + } + + if (lvl & P(LVL, LOC_RAM)) { + stats->lcl_dram++; + if (snoop & P(SNOOP, HIT)) + stats->ld_shared++; + else + stats->ld_excl++; + } + + if ((lvl & P(LVL, REM_RAM1)) || + (lvl & P(LVL, REM_RAM2))) { + stats->rmt_dram++; + if (snoop & P(SNOOP, HIT)) + stats->ld_shared++; + else + stats->ld_excl++; + } + } + + if ((lvl & P(LVL, REM_CCE1)) || + (lvl & P(LVL, REM_CCE2))) { + if (snoop & P(SNOOP, HIT)) + stats->rmt_hit++; + else if (snoop & P(SNOOP, HITM)) + stats->rmt_hitm++; + } + + if ((lvl & P(LVL, MISS))) + stats->ld_miss++; + + } else if (op & P(OP, STORE)) { + /* store */ + stats->store++; + + if (!daddr) { + stats->st_noadrs++; + return -1; + } + + if (lvl & P(LVL, HIT)) { + if (lvl & P(LVL, UNC)) stats->st_uncache++; + if (lvl & P(LVL, L1 )) stats->st_l1hit++; + } + if (lvl & P(LVL, MISS)) + if (lvl & P(LVL, L1)) stats->st_l1miss++; + } else { + /* unparsable data_src? */ + stats->noparse++; + return -1; + } + + if (!mi->daddr.map || !mi->iaddr.map) { + stats->nomap++; + return -1; + } + +#undef P + return err; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index 7f69bf9d789d..e111a2a2b18f 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -2,6 +2,10 @@ #define __PERF_MEM_EVENTS_H #include +#include +#include +#include +#include "stat.h" struct perf_mem_event { bool record; @@ -33,4 +37,36 @@ int perf_mem__lck_scnprintf(char *out, size_t sz, struct mem_info *mem_info); int perf_script__meminfo_scnprintf(char *bf, size_t size, struct mem_info *mem_info); +struct c2c_stats { + u32 nr_entries; + + u32 locks; /* count of 'lock' transactions */ + u32 store; /* count of all stores in trace */ + u32 st_uncache; /* stores to uncacheable address */ + u32 st_noadrs; /* cacheable store with no address */ + u32 st_l1hit; /* count of stores that hit L1D */ + u32 st_l1miss; /* count of stores that miss L1D */ + u32 load; /* count of all loads in trace */ + u32 ld_excl; /* exclusive loads, rmt/lcl DRAM - snp none/miss */ + u32 ld_shared; /* shared loads, rmt/lcl DRAM - snp hit */ + u32 ld_uncache; /* loads to uncacheable address */ + u32 ld_io; /* loads to io address */ + u32 ld_miss; /* loads miss */ + u32 ld_noadrs; /* cacheable load with no address */ + u32 ld_fbhit; /* count of loads hitting Fill Buffer */ + u32 ld_l1hit; /* count of loads that hit L1D */ + u32 ld_l2hit; /* count of loads that hit L2D */ + u32 ld_llchit; /* count of loads that hit LLC */ + u32 lcl_hitm; /* count of loads with local HITM */ + u32 rmt_hitm; /* count of loads with remote HITM */ + u32 rmt_hit; /* count of loads with remote hit clean; */ + u32 lcl_dram; /* count of loads miss to local DRAM */ + u32 rmt_dram; /* count of loads miss to remote DRAM */ + u32 nomap; /* count of load/stores with no phys adrs */ + u32 noparse; /* count of unparsable data sources */ +}; + +struct hist_entry; +int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); + #endif /* __PERF_MEM_EVENTS_H */ From 0a9a24cc0e9b47e83e9f603cd459ead37507e712 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:31 +0200 Subject: [PATCH 02/52] perf c2c: Introduce c2c_add_stats function Introducing c2c_add_stats function helper to cumulate c2c_stats. Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-4-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mem-events.c | 30 ++++++++++++++++++++++++++++++ tools/perf/util/mem-events.h | 1 + 2 files changed, 31 insertions(+) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index 502fcee91973..e50773286ef6 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -366,3 +366,33 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) #undef P return err; } + +void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add) +{ + stats->nr_entries += add->nr_entries; + + stats->locks += add->locks; + stats->store += add->store; + stats->st_uncache += add->st_uncache; + stats->st_noadrs += add->st_noadrs; + stats->st_l1hit += add->st_l1hit; + stats->st_l1miss += add->st_l1miss; + stats->load += add->load; + stats->ld_excl += add->ld_excl; + stats->ld_shared += add->ld_shared; + stats->ld_uncache += add->ld_uncache; + stats->ld_io += add->ld_io; + stats->ld_miss += add->ld_miss; + stats->ld_noadrs += add->ld_noadrs; + stats->ld_fbhit += add->ld_fbhit; + stats->ld_l1hit += add->ld_l1hit; + stats->ld_l2hit += add->ld_l2hit; + stats->ld_llchit += add->ld_llchit; + stats->lcl_hitm += add->lcl_hitm; + stats->rmt_hitm += add->rmt_hitm; + stats->rmt_hit += add->rmt_hit; + stats->lcl_dram += add->lcl_dram; + stats->rmt_dram += add->rmt_dram; + stats->nomap += add->nomap; + stats->noparse += add->noparse; +} diff --git a/tools/perf/util/mem-events.h b/tools/perf/util/mem-events.h index e111a2a2b18f..faf80403b519 100644 --- a/tools/perf/util/mem-events.h +++ b/tools/perf/util/mem-events.h @@ -68,5 +68,6 @@ struct c2c_stats { struct hist_entry; int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi); +void c2c_add_stats(struct c2c_stats *stats, struct c2c_stats *add); #endif /* __PERF_MEM_EVENTS_H */ From 7aef3bf3daa182f31d197e1a4f789797cc3cc561 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:38 +0200 Subject: [PATCH 03/52] perf c2c: Add c2c command Adding c2c command base wirings. Its implementation is going to be added gradually in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-11-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 1 + tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++ tools/perf/builtin.h | 1 + tools/perf/perf.c | 1 + 4 files changed, 26 insertions(+) create mode 100644 tools/perf/builtin-c2c.c diff --git a/tools/perf/Build b/tools/perf/Build index a43fae7f439a..b12d5d1666e3 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -21,6 +21,7 @@ perf-y += builtin-inject.o perf-y += builtin-mem.o perf-y += builtin-data.o perf-y += builtin-version.o +perf-y += builtin-c2c.o perf-$(CONFIG_AUDIT) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c new file mode 100644 index 000000000000..8252ed0ba5d0 --- /dev/null +++ b/tools/perf/builtin-c2c.c @@ -0,0 +1,23 @@ +#include +#include +#include "util.h" +#include "debug.h" +#include "builtin.h" +#include + +static const char * const c2c_usage[] = { + "perf c2c", + NULL +}; + +int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) +{ + const struct option c2c_options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_END() + }; + + argc = parse_options(argc, argv, c2c_options, c2c_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + return 0; +} diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index 41c24010ab43..0bcf68e98ccc 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -18,6 +18,7 @@ int cmd_bench(int argc, const char **argv, const char *prefix); int cmd_buildid_cache(int argc, const char **argv, const char *prefix); int cmd_buildid_list(int argc, const char **argv, const char *prefix); int cmd_config(int argc, const char **argv, const char *prefix); +int cmd_c2c(int argc, const char **argv, const char *prefix); int cmd_diff(int argc, const char **argv, const char *prefix); int cmd_evlist(int argc, const char **argv, const char *prefix); int cmd_help(int argc, const char **argv, const char *prefix); diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 64c06961bfe4..aa23b3347d6b 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -43,6 +43,7 @@ static struct cmd_struct commands[] = { { "buildid-cache", cmd_buildid_cache, 0 }, { "buildid-list", cmd_buildid_list, 0 }, { "config", cmd_config, 0 }, + { "c2c", cmd_c2c, 0 }, { "diff", cmd_diff, 0 }, { "evlist", cmd_evlist, 0 }, { "help", cmd_help, 0 }, From 39bcd4a4e4cbd0ce41a6be848aec335646de1919 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:39 +0200 Subject: [PATCH 04/52] perf c2c: Add record subcommand Adding c2c record subcommand. It setups options related to HITM cacheline analysis and calls standard perf record command. $ sudo perf c2c record -v -- -a calling: record -W -d --sample-cpu -e cpu/mem-loads,ldlat=30/P -e cpu/mem-stores/P -a ... It produces perf.data, which is to be reported by perf c2c report, that comes in following patches. Details are described in the man page, which is added in one of the following patches. Committer notes: Testing it: # perf c2c record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 5.050 MB perf.data (412 samples) ] # ls -la perf.data -rw-------. 1 root root 5301752 Oct 4 13:32 perf.data # perf evlist cpu/mem-loads,ldlat=30/P cpu/mem-stores/P # perf evlist -v cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1 # # perf report --stdio # Total Lost Samples: 14 # Samples: 216 of event 'cpu/mem-loads,ldlat=30/P' # Event count (approx.): 15207 # Overhead Symbol Shared Object # ........ ..................................... ............................ 10.32% [k] update_blocked_averages [kernel.vmlinux] 3.43% [.] 0x00000000001a2122 qemu-system-x86_64 (deleted) 2.52% [k] enqueue_entity [kernel.vmlinux] 1.88% [.] g_main_context_query libglib-2.0.so.0.4800.2 1.86% [k] __schedule [kernel.vmlinux] # Samples: 196 of event 'cpu/mem-stores/P' # Event count (approx.): 14771346 # Overhead Symbol Shared Object # ........ ................................... ............................ 13.91% [k] intel_idle [kernel.vmlinux] 3.02% [.] 0x00000000022f06ea chrome 2.94% [.] 0x00000000001a1b4c qemu-system-x86_64 (deleted) 2.94% [.] 0x000000000019d8e4 qemu-system-x86_64 (deleted) 2.38% [.] 0x00000000001a1c52 qemu-system-x86_64 (deleted) Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-12-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 114 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 114 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 8252ed0ba5d0..58924c67f818 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -4,12 +4,116 @@ #include "debug.h" #include "builtin.h" #include +#include "mem-events.h" static const char * const c2c_usage[] = { "perf c2c", NULL }; +static int parse_record_events(const struct option *opt __maybe_unused, + const char *str, int unset __maybe_unused) +{ + bool *event_set = (bool *) opt->value; + + *event_set = true; + return perf_mem_events__parse(str); +} + + +static const char * const __usage_record[] = { + "perf c2c record [] []", + "perf c2c record [] -- []", + NULL +}; + +static const char * const *record_mem_usage = __usage_record; + +static int perf_c2c__record(int argc, const char **argv) +{ + int rec_argc, i = 0, j; + const char **rec_argv; + int ret; + bool all_user = false, all_kernel = false; + bool event_set = false; + struct option options[] = { + OPT_CALLBACK('e', "event", &event_set, "event", + "event selector. Use 'perf mem record -e list' to list available events", + parse_record_events), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_BOOLEAN('u', "all-user", &all_user, "collect only user level data"), + OPT_BOOLEAN('k', "all-kernel", &all_kernel, "collect only kernel level data"), + OPT_UINTEGER('l', "ldlat", &perf_mem_events__loads_ldlat, "setup mem-loads latency"), + OPT_END() + }; + + if (perf_mem_events__init()) { + pr_err("failed: memory events not supported\n"); + return -1; + } + + argc = parse_options(argc, argv, options, record_mem_usage, + PARSE_OPT_KEEP_UNKNOWN); + + rec_argc = argc + 10; /* max number of arguments */ + rec_argv = calloc(rec_argc + 1, sizeof(char *)); + if (!rec_argv) + return -1; + + rec_argv[i++] = "record"; + + if (!event_set) { + perf_mem_events[PERF_MEM_EVENTS__LOAD].record = true; + perf_mem_events[PERF_MEM_EVENTS__STORE].record = true; + } + + if (perf_mem_events[PERF_MEM_EVENTS__LOAD].record) + rec_argv[i++] = "-W"; + + rec_argv[i++] = "-d"; + rec_argv[i++] = "--sample-cpu"; + + for (j = 0; j < PERF_MEM_EVENTS__MAX; j++) { + if (!perf_mem_events[j].record) + continue; + + if (!perf_mem_events[j].supported) { + pr_err("failed: event '%s' not supported\n", + perf_mem_events[j].name); + return -1; + } + + rec_argv[i++] = "-e"; + rec_argv[i++] = perf_mem_events__name(j); + }; + + if (all_user) + rec_argv[i++] = "--all-user"; + + if (all_kernel) + rec_argv[i++] = "--all-kernel"; + + for (j = 0; j < argc; j++, i++) + rec_argv[i] = argv[j]; + + if (verbose > 0) { + pr_debug("calling: "); + + j = 0; + + while (rec_argv[j]) { + pr_debug("%s ", rec_argv[j]); + j++; + } + pr_debug("\n"); + } + + ret = cmd_record(i, rec_argv, NULL); + free(rec_argv); + return ret; +} + int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) { const struct option c2c_options[] = { @@ -19,5 +123,15 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, c2c_options, c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); + + if (!argc) + usage_with_options(c2c_usage, c2c_options); + + if (!strncmp(argv[0], "rec", 3)) { + return perf_c2c__record(argc, argv); + } else { + usage_with_options(c2c_usage, c2c_options); + } + return 0; } From 903a6f15b9968a048760d79224cec4ce4b06d781 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:40 +0200 Subject: [PATCH 05/52] perf c2c: Add report subcommand Adding c2c report subcommand. It reads the perf.data and displays shared data analysis. This patch adds report basic wirings. It gets fully implemented in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-13-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 66 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 65 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 58924c67f818..3fac3a294bdd 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -5,12 +5,74 @@ #include "builtin.h" #include #include "mem-events.h" +#include "session.h" +#include "hist.h" +#include "tool.h" +#include "data.h" + +struct perf_c2c { + struct perf_tool tool; +}; + +static struct perf_c2c c2c; static const char * const c2c_usage[] = { - "perf c2c", + "perf c2c {record|report}", NULL }; +static const char * const __usage_report[] = { + "perf c2c report", + NULL +}; + +static const char * const *report_c2c_usage = __usage_report; + +static int perf_c2c__report(int argc, const char **argv) +{ + struct perf_session *session; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; + const struct option c2c_options[] = { + OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, + "file", "vmlinux pathname"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show counter open errors, etc)"), + OPT_STRING('i', "input", &input_name, "file", + "the input file to process"), + OPT_END() + }; + int err = 0; + + argc = parse_options(argc, argv, c2c_options, report_c2c_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (!argc) + usage_with_options(report_c2c_usage, c2c_options); + + file.path = input_name; + + session = perf_session__new(&file, 0, &c2c.tool); + if (session == NULL) { + pr_debug("No memory for session\n"); + goto out; + } + + if (symbol__init(&session->header.env) < 0) + goto out_session; + + /* No pipe support at the moment. */ + if (perf_data_file__is_pipe(session->file)) { + pr_debug("No pipe support at the moment.\n"); + goto out_session; + } + +out_session: + perf_session__delete(session); +out: + return err; +} + static int parse_record_events(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) { @@ -129,6 +191,8 @@ int cmd_c2c(int argc, const char **argv, const char *prefix __maybe_unused) if (!strncmp(argv[0], "rec", 3)) { return perf_c2c__record(argc, argv); + } else if (!strncmp(argv[0], "rep", 3)) { + return perf_c2c__report(argc, argv); } else { usage_with_options(c2c_usage, c2c_options); } From c75540e3160fb5867a3d88ea195cb300e66f22c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:41 +0200 Subject: [PATCH 06/52] perf c2c report: Add dimension support Adding bare bones of dimension support for c2c report. Main interface functions are: c2c_hists__init c2c_hists__reinit which re/initialize 'struct c2c_hists' object with sort/display entries string, in a similar way that setup_sorting function does. We overload the dimension to provide multi line header support for sort/display entries. Also we overload base 'struct perf_hpp_fmt' object with 'struct c2c_fmt' to define c2c specific functions to deal with multi line headers and spans. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-14-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 239 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 238 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3fac3a294bdd..63c0e2d8d2d8 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -10,8 +10,14 @@ #include "tool.h" #include "data.h" +struct c2c_hists { + struct hists hists; + struct perf_hpp_list list; +}; + struct perf_c2c { - struct perf_tool tool; + struct perf_tool tool; + struct c2c_hists hists; }; static struct perf_c2c c2c; @@ -28,6 +34,231 @@ static const char * const __usage_report[] = { static const char * const *report_c2c_usage = __usage_report; +#define C2C_HEADER_MAX 2 + +struct c2c_header { + struct { + const char *text; + int span; + } line[C2C_HEADER_MAX]; +}; + +struct c2c_dimension { + struct c2c_header header; + const char *name; + int width; + + int64_t (*cmp)(struct perf_hpp_fmt *fmt, + struct hist_entry *, struct hist_entry *); + int (*entry)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he); + int (*color)(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he); +}; + +struct c2c_fmt { + struct perf_hpp_fmt fmt; + struct c2c_dimension *dim; +}; + +static int c2c_width(struct perf_hpp_fmt *fmt, + struct perf_hpp *hpp __maybe_unused, + struct hists *hists __maybe_unused) +{ + struct c2c_fmt *c2c_fmt; + + c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + return c2c_fmt->dim->width; +} + +static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hists *hists __maybe_unused, int line, int *span) +{ + struct c2c_fmt *c2c_fmt; + struct c2c_dimension *dim; + int len = c2c_width(fmt, hpp, hists); + const char *text; + + c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + dim = c2c_fmt->dim; + + text = dim->header.line[line].text; + if (text == NULL) + text = ""; + + if (*span) { + (*span)--; + return 0; + } else { + *span = dim->header.line[line].span; + } + + return scnprintf(hpp->buf, hpp->size, "%*s", len, text); +} + +static struct c2c_dimension *dimensions[] = { + NULL, +}; + +static void fmt_free(struct perf_hpp_fmt *fmt) +{ + struct c2c_fmt *c2c_fmt; + + c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + free(c2c_fmt); +} + +static bool fmt_equal(struct perf_hpp_fmt *a, struct perf_hpp_fmt *b) +{ + struct c2c_fmt *c2c_a = container_of(a, struct c2c_fmt, fmt); + struct c2c_fmt *c2c_b = container_of(b, struct c2c_fmt, fmt); + + return c2c_a->dim == c2c_b->dim; +} + +static struct c2c_dimension *get_dimension(const char *name) +{ + unsigned int i; + + for (i = 0; dimensions[i]; i++) { + struct c2c_dimension *dim = dimensions[i]; + + if (!strcmp(dim->name, name)) + return dim; + }; + + return NULL; +} + +static struct c2c_fmt *get_format(const char *name) +{ + struct c2c_dimension *dim = get_dimension(name); + struct c2c_fmt *c2c_fmt; + struct perf_hpp_fmt *fmt; + + if (!dim) + return NULL; + + c2c_fmt = zalloc(sizeof(*c2c_fmt)); + if (!c2c_fmt) + return NULL; + + c2c_fmt->dim = dim; + + fmt = &c2c_fmt->fmt; + INIT_LIST_HEAD(&fmt->list); + INIT_LIST_HEAD(&fmt->sort_list); + + fmt->cmp = dim->cmp; + fmt->sort = dim->cmp; + fmt->entry = dim->entry; + fmt->header = c2c_header; + fmt->width = c2c_width; + fmt->collapse = dim->cmp; + fmt->equal = fmt_equal; + fmt->free = fmt_free; + + return c2c_fmt; +} + +static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) +{ + struct c2c_fmt *c2c_fmt = get_format(name); + + if (!c2c_fmt) + return -1; + + perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt); + return 0; +} + +static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) +{ + struct c2c_fmt *c2c_fmt = get_format(name); + + if (!c2c_fmt) + return -1; + + perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt); + return 0; +} + +#define PARSE_LIST(_list, _fn) \ + do { \ + char *tmp, *tok; \ + ret = 0; \ + \ + if (!_list) \ + break; \ + \ + for (tok = strtok_r((char *)_list, ", ", &tmp); \ + tok; tok = strtok_r(NULL, ", ", &tmp)) { \ + ret = _fn(hpp_list, tok); \ + if (ret == -EINVAL) { \ + error("Invalid --fields key: `%s'", tok); \ + break; \ + } else if (ret == -ESRCH) { \ + error("Unknown --fields key: `%s'", tok); \ + break; \ + } \ + } \ + } while (0) + +static int hpp_list__parse(struct perf_hpp_list *hpp_list, + const char *output_, + const char *sort_) +{ + char *output = output_ ? strdup(output_) : NULL; + char *sort = sort_ ? strdup(sort_) : NULL; + int ret; + + PARSE_LIST(output, c2c_hists__init_output); + PARSE_LIST(sort, c2c_hists__init_sort); + + /* copy sort keys to output fields */ + perf_hpp__setup_output_field(hpp_list); + + /* + * We dont need other sorting keys other than those + * we already specified. It also really slows down + * the processing a lot with big number of output + * fields, so switching this off for c2c. + */ + +#if 0 + /* and then copy output fields to sort keys */ + perf_hpp__append_sort_keys(&hists->list); +#endif + + free(output); + free(sort); + return ret; +} + +static int c2c_hists__init(struct c2c_hists *hists, + const char *sort) +{ + __hists__init(&hists->hists, &hists->list); + + /* + * Initialize only with sort fields, we need to resort + * later anyway, and that's where we add output fields + * as well. + */ + perf_hpp_list__init(&hists->list); + + return hpp_list__parse(&hists->list, NULL, sort); +} + +__maybe_unused +static int c2c_hists__reinit(struct c2c_hists *c2c_hists, + const char *output, + const char *sort) +{ + perf_hpp__reset_output_field(&c2c_hists->list); + return hpp_list__parse(&c2c_hists->list, output, sort); +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -52,6 +283,12 @@ static int perf_c2c__report(int argc, const char **argv) file.path = input_name; + err = c2c_hists__init(&c2c.hists, "dcacheline"); + if (err) { + pr_debug("Failed to initialize hists\n"); + goto out; + } + session = perf_session__new(&file, 0, &c2c.tool); if (session == NULL) { pr_debug("No memory for session\n"); From 8d3f938dc757549dd75d1b4df4f7faf92dc5dfc3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:42 +0200 Subject: [PATCH 07/52] perf c2c report: Add sort_entry dimension support Allow to reuse 'struct sort_entry' objects within c2c dimension support. In case the 'struct sort_entry' object meets the need of c2c report we will use it directly in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-15-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 96 ++++++++++++++++++++++++++++++---------- 1 file changed, 72 insertions(+), 24 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 63c0e2d8d2d8..6b58b537bc9d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -9,6 +9,7 @@ #include "hist.h" #include "tool.h" #include "data.h" +#include "sort.h" struct c2c_hists { struct hists hists; @@ -47,6 +48,7 @@ struct c2c_dimension { struct c2c_header header; const char *name; int width; + struct sort_entry *se; int64_t (*cmp)(struct perf_hpp_fmt *fmt, struct hist_entry *, struct hist_entry *); @@ -66,34 +68,47 @@ static int c2c_width(struct perf_hpp_fmt *fmt, struct hists *hists __maybe_unused) { struct c2c_fmt *c2c_fmt; - - c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); - return c2c_fmt->dim->width; -} - -static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, - struct hists *hists __maybe_unused, int line, int *span) -{ - struct c2c_fmt *c2c_fmt; struct c2c_dimension *dim; - int len = c2c_width(fmt, hpp, hists); - const char *text; c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); dim = c2c_fmt->dim; - text = dim->header.line[line].text; + return dim->se ? hists__col_len(hists, dim->se->se_width_idx) : + c2c_fmt->dim->width; +} + +static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hists *hists, int line, int *span) +{ + struct perf_hpp_list *hpp_list = hists->hpp_list; + struct c2c_fmt *c2c_fmt; + struct c2c_dimension *dim; + const char *text = NULL; + int width = c2c_width(fmt, hpp, hists); + + c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + dim = c2c_fmt->dim; + + if (dim->se) { + text = dim->header.line[line].text; + /* Use the last line from sort_entry if not defined. */ + if (!text && (line == hpp_list->nr_header_lines - 1)) + text = dim->se->se_header; + } else { + text = dim->header.line[line].text; + + if (*span) { + (*span)--; + return 0; + } else { + *span = dim->header.line[line].span; + } + } + if (text == NULL) text = ""; - if (*span) { - (*span)--; - return 0; - } else { - *span = dim->header.line[line].span; - } - - return scnprintf(hpp->buf, hpp->size, "%*s", len, text); + return scnprintf(hpp->buf, hpp->size, "%*s", width, text); } static struct c2c_dimension *dimensions[] = { @@ -130,6 +145,39 @@ static struct c2c_dimension *get_dimension(const char *name) return NULL; } +static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + struct c2c_dimension *dim = c2c_fmt->dim; + size_t len = fmt->user_len; + + if (!len) + len = hists__col_len(he->hists, dim->se->se_width_idx); + + return dim->se->se_snprintf(he, hpp->buf, hpp->size, len); +} + +static int64_t c2c_se_cmp(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + struct c2c_dimension *dim = c2c_fmt->dim; + + return dim->se->se_cmp(a, b); +} + +static int64_t c2c_se_collapse(struct perf_hpp_fmt *fmt, + struct hist_entry *a, struct hist_entry *b) +{ + struct c2c_fmt *c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); + struct c2c_dimension *dim = c2c_fmt->dim; + int64_t (*collapse_fn)(struct hist_entry *, struct hist_entry *); + + collapse_fn = dim->se->se_collapse ?: dim->se->se_cmp; + return collapse_fn(a, b); +} + static struct c2c_fmt *get_format(const char *name) { struct c2c_dimension *dim = get_dimension(name); @@ -149,12 +197,12 @@ static struct c2c_fmt *get_format(const char *name) INIT_LIST_HEAD(&fmt->list); INIT_LIST_HEAD(&fmt->sort_list); - fmt->cmp = dim->cmp; - fmt->sort = dim->cmp; - fmt->entry = dim->entry; + fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp; + fmt->sort = dim->se ? c2c_se_cmp : dim->cmp; + fmt->entry = dim->se ? c2c_se_entry : dim->entry; fmt->header = c2c_header; fmt->width = c2c_width; - fmt->collapse = dim->cmp; + fmt->collapse = dim->se ? c2c_se_collapse : dim->cmp; fmt->equal = fmt_equal; fmt->free = fmt_free; From 5f2eca833cc244c6872e83fb4a5faaae1c0a87b7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:43 +0200 Subject: [PATCH 08/52] perf c2c report: Fallback to standard dimensions Fallback to standard dimensions in case we don't find the dimension within c2c ones. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-16-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 6b58b537bc9d..a3481f86e2ae 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -213,8 +213,10 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); - if (!c2c_fmt) - return -1; + if (!c2c_fmt) { + reset_dimensions(); + return output_field_add(hpp_list, name); + } perf_hpp_list__column_register(hpp_list, &c2c_fmt->fmt); return 0; @@ -224,8 +226,10 @@ static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); - if (!c2c_fmt) - return -1; + if (!c2c_fmt) { + reset_dimensions(); + return sort_dimension__add(hpp_list, name, NULL, 0); + } perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt); return 0; From 78b275437873da5431b7ccc61f7ce3827bb55324 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:44 +0200 Subject: [PATCH 09/52] perf c2c report: Add sample processing Adding basic sample processing specific hist_entry allocation callbacks (via hists__add_entry_ops). Overloading 'struct hist_entry' object with new 'struct c2c_hist_entry'. The new hist entry object will carry specific stats and nested hists objects. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-17-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 108 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 107 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index a3481f86e2ae..29fb9573e292 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -16,6 +16,15 @@ struct c2c_hists { struct perf_hpp_list list; }; +struct c2c_hist_entry { + struct c2c_hists *hists; + /* + * must be at the end, + * because of its callchain dynamic entry + */ + struct hist_entry he; +}; + struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; @@ -23,6 +32,86 @@ struct perf_c2c { static struct perf_c2c c2c; +static void *c2c_he_zalloc(size_t size) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = zalloc(size + sizeof(*c2c_he)); + if (!c2c_he) + return NULL; + + return &c2c_he->he; +} + +static void c2c_he_free(void *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c_he->hists) { + hists__delete_entries(&c2c_he->hists->hists); + free(c2c_he->hists); + } + + free(c2c_he); +} + +static struct hist_entry_ops c2c_entry_ops = { + .new = c2c_he_zalloc, + .free = c2c_he_free, +}; + +static int process_sample_event(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct perf_evsel *evsel __maybe_unused, + struct machine *machine) +{ + struct hists *hists = &c2c.hists.hists; + struct hist_entry *he; + struct addr_location al; + struct mem_info *mi; + int ret; + + if (machine__resolve(machine, &al, sample) < 0) { + pr_debug("problem processing %d event, skipping it.\n", + event->header.type); + return -1; + } + + mi = sample__resolve_mem(sample, &al); + if (mi == NULL) + return -ENOMEM; + + he = hists__add_entry_ops(hists, &c2c_entry_ops, + &al, NULL, NULL, mi, + sample, true); + if (he == NULL) { + free(mi); + return -ENOMEM; + } + + hists__inc_nr_samples(hists, he->filtered); + ret = hist_entry__append_callchain(he, sample); + + addr_location__put(&al); + return ret; +} + +static struct perf_c2c c2c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, +}; + static const char * const c2c_usage[] = { "perf c2c {record|report}", NULL @@ -314,6 +403,7 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; + struct ui_progress prog; struct perf_data_file file = { .mode = PERF_DATA_MODE_READ, }; @@ -330,9 +420,12 @@ static int perf_c2c__report(int argc, const char **argv) argc = parse_options(argc, argv, c2c_options, report_c2c_usage, PARSE_OPT_STOP_AT_NON_OPTION); - if (!argc) + if (argc) usage_with_options(report_c2c_usage, c2c_options); + if (!input_name || !strlen(input_name)) + input_name = "perf.data"; + file.path = input_name; err = c2c_hists__init(&c2c.hists, "dcacheline"); @@ -356,6 +449,19 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } + err = perf_session__process_events(session); + if (err) { + pr_err("failed to process sample\n"); + goto out_session; + } + + ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); + + hists__collapse_resort(&c2c.hists.hists, NULL); + hists__output_resort(&c2c.hists.hists, &prog); + + ui_progress__finish(); + out_session: perf_session__delete(session); out: From ec06f9b9b23f29c8f25367fc43c85c327229d5ca Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:45 +0200 Subject: [PATCH 10/52] perf c2c report: Add cacheline hists processing Store cacheline related entries in nested hist object for each cacheline data. Nested entries are sorted by 'offset' within related cacheline. We will allow specific sort keys to be configured for nested cacheline data entries in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-18-git-send-email-jolsa@kernel.org [ he__get_hists() should return NULL when c2c_hists__init() fails ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 92 +++++++++++++++++++++++++++++++++++++--- 1 file changed, 86 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 29fb9573e292..df413b564361 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -61,6 +61,34 @@ static struct hist_entry_ops c2c_entry_ops = { .free = c2c_he_free, }; +static int c2c_hists__init(struct c2c_hists *hists, + const char *sort); + +static struct hists* +he__get_hists(struct hist_entry *he, + const char *sort) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *hists; + int ret; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + if (c2c_he->hists) + return &c2c_he->hists->hists; + + hists = c2c_he->hists = zalloc(sizeof(*hists)); + if (!hists) + return NULL; + + ret = c2c_hists__init(hists, sort); + if (ret) { + free(hists); + return NULL; + } + + return &hists->hists; +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -70,7 +98,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct hists *hists = &c2c.hists.hists; struct hist_entry *he; struct addr_location al; - struct mem_info *mi; + struct mem_info *mi, *mi_dup; int ret; if (machine__resolve(machine, &al, sample) < 0) { @@ -83,19 +111,50 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (mi == NULL) return -ENOMEM; + mi_dup = memdup(mi, sizeof(*mi)); + if (!mi_dup) + goto free_mi; + he = hists__add_entry_ops(hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); - if (he == NULL) { - free(mi); - return -ENOMEM; - } + if (he == NULL) + goto free_mi_dup; hists__inc_nr_samples(hists, he->filtered); ret = hist_entry__append_callchain(he, sample); + if (!ret) { + mi = mi_dup; + + mi_dup = memdup(mi, sizeof(*mi)); + if (!mi_dup) + goto free_mi; + + hists = he__get_hists(he, "offset"); + if (!hists) + goto free_mi_dup; + + he = hists__add_entry_ops(hists, &c2c_entry_ops, + &al, NULL, NULL, mi, + sample, true); + if (he == NULL) + goto free_mi_dup; + + hists__inc_nr_samples(hists, he->filtered); + ret = hist_entry__append_callchain(he, sample); + } + +out: addr_location__put(&al); return ret; + +free_mi_dup: + free(mi_dup); +free_mi: + free(mi); + ret = -ENOMEM; + goto out; } static struct perf_c2c c2c = { @@ -400,6 +459,27 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, return hpp_list__parse(&c2c_hists->list, output, sort); } +static int filter_cb(struct hist_entry *he __maybe_unused) +{ + return 0; +} + +static int resort_cl_cb(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *c2c_hists; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_hists = c2c_he->hists; + + if (c2c_hists) { + hists__collapse_resort(&c2c_hists->hists, NULL); + hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); + } + + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -458,7 +538,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort(&c2c.hists.hists, &prog); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb); ui_progress__finish(); From b2252ae67b687d2b6f1a159a94e8387f6dbf3f43 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:46 +0200 Subject: [PATCH 11/52] perf c2c report: Decode c2c_stats for hist entries Decoding and storing c2c_stats for each hist entry. Changing related function to work with c2c_* objects. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-19-git-send-email-jolsa@kernel.org [ Add '.nr_entries = 0' to the c2c_stats initialization to fix the build on older distros ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index df413b564361..43f18aa3367b 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -14,10 +14,12 @@ struct c2c_hists { struct hists hists; struct perf_hpp_list list; + struct c2c_stats stats; }; struct c2c_hist_entry { struct c2c_hists *hists; + struct c2c_stats stats; /* * must be at the end, * because of its callchain dynamic entry @@ -64,9 +66,9 @@ static struct hist_entry_ops c2c_entry_ops = { static int c2c_hists__init(struct c2c_hists *hists, const char *sort); -static struct hists* -he__get_hists(struct hist_entry *he, - const char *sort) +static struct c2c_hists* +he__get_c2c_hists(struct hist_entry *he, + const char *sort) { struct c2c_hist_entry *c2c_he; struct c2c_hists *hists; @@ -74,7 +76,7 @@ he__get_hists(struct hist_entry *he, c2c_he = container_of(he, struct c2c_hist_entry, he); if (c2c_he->hists) - return &c2c_he->hists->hists; + return c2c_he->hists; hists = c2c_he->hists = zalloc(sizeof(*hists)); if (!hists) @@ -86,7 +88,7 @@ he__get_hists(struct hist_entry *he, return NULL; } - return &hists->hists; + return hists; } static int process_sample_event(struct perf_tool *tool __maybe_unused, @@ -95,7 +97,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct perf_evsel *evsel __maybe_unused, struct machine *machine) { - struct hists *hists = &c2c.hists.hists; + struct c2c_hists *c2c_hists = &c2c.hists; + struct c2c_hist_entry *c2c_he; + struct c2c_stats stats = { .nr_entries = 0, }; struct hist_entry *he; struct addr_location al; struct mem_info *mi, *mi_dup; @@ -115,13 +119,19 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - he = hists__add_entry_ops(hists, &c2c_entry_ops, + c2c_decode_stats(&stats, mi); + + he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); if (he == NULL) goto free_mi_dup; - hists__inc_nr_samples(hists, he->filtered); + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_add_stats(&c2c_he->stats, &stats); + c2c_add_stats(&c2c_hists->stats, &stats); + + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); if (!ret) { @@ -131,17 +141,21 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - hists = he__get_hists(he, "offset"); - if (!hists) + c2c_hists = he__get_c2c_hists(he, "offset"); + if (!c2c_hists) goto free_mi_dup; - he = hists__add_entry_ops(hists, &c2c_entry_ops, + he = hists__add_entry_ops(&c2c_hists->hists, &c2c_entry_ops, &al, NULL, NULL, mi, sample, true); if (he == NULL) goto free_mi_dup; - hists__inc_nr_samples(hists, he->filtered); + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_add_stats(&c2c_he->stats, &stats); + c2c_add_stats(&c2c_hists->stats, &stats); + + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); } From 600a8cf45b797ff189c42175c1f165fb5cb9479a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:47 +0200 Subject: [PATCH 12/52] perf c2c report: Add header macros Adding helping macros to define header objects. It will be used in following patches, that add new dimensions. The c2c report will support 2 line headers, hence we only define line[0/1] in macros. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-20-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 43f18aa3367b..78addc42c9e5 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -273,6 +273,41 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, text); } +#define HEADER_LOW(__h) \ + { \ + .line[1] = { \ + .text = __h, \ + }, \ + } + +#define HEADER_BOTH(__h0, __h1) \ + { \ + .line[0] = { \ + .text = __h0, \ + }, \ + .line[1] = { \ + .text = __h1, \ + }, \ + } + +#define HEADER_SPAN(__h0, __h1, __s) \ + { \ + .line[0] = { \ + .text = __h0, \ + .span = __s, \ + }, \ + .line[1] = { \ + .text = __h1, \ + }, \ + } + +#define HEADER_SPAN_LOW(__h) \ + { \ + .line[1] = { \ + .text = __h, \ + }, \ + } + static struct c2c_dimension *dimensions[] = { NULL, }; From cbb88500a7698bbe8751f01222081fa7f0641fd9 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 22 Sep 2016 17:36:48 +0200 Subject: [PATCH 13/52] perf c2c report: Add 'dcacheline' dimension key It displays cacheline address as hex number. Using c2c wrapper to standard 'dcacheline' object to defined own header and simple (just address) cacheline output. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1474558645-19956-21-git-send-email-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 78addc42c9e5..3a3e67f6e772 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,5 +1,6 @@ #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" @@ -7,6 +8,7 @@ #include "mem-events.h" #include "session.h" #include "hist.h" +#include "sort.h" #include "tool.h" #include "data.h" #include "sort.h" @@ -273,6 +275,32 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, text); } +#define HEX_STR(__s, __v) \ +({ \ + scnprintf(__s, sizeof(__s), "0x%" PRIx64, __v); \ + __s; \ +}) + +static int64_t +dcacheline_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return sort__dcacheline_cmp(left, right); +} + +static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + char buf[20]; + + if (he->mem_info) + addr = cl_address(he->mem_info->daddr.addr); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -308,7 +336,16 @@ static int c2c_header(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, }, \ } +static struct c2c_dimension dim_dcacheline = { + .header = HEADER_LOW("Cacheline"), + .name = "dcacheline", + .cmp = dcacheline_cmp, + .entry = dcacheline_entry, + .width = 18, +}; + static struct c2c_dimension *dimensions[] = { + &dim_dcacheline, NULL, }; From 48acdebdc328a9776017111761cd9051fe9d63ff Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 29 Apr 2016 14:37:06 +0200 Subject: [PATCH 14/52] perf c2c report: Add 'offset' dimension key It displays cacheline offset as hex number. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-m0424ye98lqveg5nopto8qww@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 3a3e67f6e772..01d541fd7022 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -301,6 +301,33 @@ static int dcacheline_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); } +static int offset_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + char buf[20]; + + if (he->mem_info) + addr = cl_offset(he->mem_info->daddr.al_addr); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); +} + +static int64_t +offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + uint64_t l = 0, r = 0; + + if (left->mem_info) + l = cl_offset(left->mem_info->daddr.addr); + if (right->mem_info) + r = cl_offset(right->mem_info->daddr.addr); + + return (int64_t)(r - l); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -344,8 +371,17 @@ static struct c2c_dimension dim_dcacheline = { .width = 18, }; +static struct c2c_dimension dim_offset = { + .header = HEADER_BOTH("Data address", "Offset"), + .name = "offset", + .cmp = offset_cmp, + .entry = offset_entry, + .width = 18, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, + &dim_offset, NULL, }; From 43575a95207a88e2f8ccb5e4130a808e325816a7 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 21:48:56 +0200 Subject: [PATCH 15/52] perf c2c report: Add 'iaddr' dimension key It displays the code address (as hex number) responsible for the accesses. Using c2c wrapper to standard 'symbol_iaddr' object to define own header and simple (just address) code address output. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-rhshygbst6kr75kju0muwt5x@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 01d541fd7022..bb9d01874836 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -328,6 +328,27 @@ offset_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return (int64_t)(r - l); } +static int +iaddr_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + uint64_t addr = 0; + int width = c2c_width(fmt, hpp, he->hists); + char buf[20]; + + if (he->mem_info) + addr = he->mem_info->iaddr.addr; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, HEX_STR(buf, addr)); +} + +static int64_t +iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return sort__iaddr_cmp(left, right); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -379,9 +400,18 @@ static struct c2c_dimension dim_offset = { .width = 18, }; +static struct c2c_dimension dim_iaddr = { + .header = HEADER_LOW("Code address"), + .name = "iaddr", + .cmp = iaddr_cmp, + .entry = iaddr_entry, + .width = 18, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, + &dim_iaddr, NULL, }; From 97cb486e497a3f967a5644d40bc854904a0bbffb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 23 May 2016 16:20:14 +0200 Subject: [PATCH 16/52] perf c2c report: Add hitm related dimension keys Adding 5 hitm related dimension key wrappers. First 3 are to be displayed in the main cachelines overall output: tot_hitm, lcl_hitm, rmt_hitm The latter 2 are to be displayed within single cacheline output: cl_rmt_hitm, cl_lcl_hitm They all display bare numbers of remote/local/total HITMs for cacheline or its related offsets. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-iju5239xa5heqqben65g1u7e@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 109 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bb9d01874836..039e7369dc6c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -349,6 +349,70 @@ iaddr_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return sort__iaddr_cmp(left, right); } +static int +tot_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + unsigned int tot_hitm; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_hitm = c2c_he->stats.lcl_hitm + c2c_he->stats.rmt_hitm; + + return scnprintf(hpp->buf, hpp->size, "%*u", width, tot_hitm); +} + +static int64_t +tot_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + unsigned int tot_hitm_left; + unsigned int tot_hitm_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_hitm_left = c2c_left->stats.lcl_hitm + c2c_left->stats.rmt_hitm; + tot_hitm_right = c2c_right->stats.lcl_hitm + c2c_right->stats.rmt_hitm; + + return tot_hitm_left - tot_hitm_right; +} + +#define STAT_FN_ENTRY(__f) \ +static int \ +__f ## _entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, \ + struct hist_entry *he) \ +{ \ + struct c2c_hist_entry *c2c_he; \ + int width = c2c_width(fmt, hpp, he->hists); \ + \ + c2c_he = container_of(he, struct c2c_hist_entry, he); \ + return scnprintf(hpp->buf, hpp->size, "%*u", width, \ + c2c_he->stats.__f); \ +} + +#define STAT_FN_CMP(__f) \ +static int64_t \ +__f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ + struct hist_entry *left, struct hist_entry *right) \ +{ \ + struct c2c_hist_entry *c2c_left, *c2c_right; \ + \ + c2c_left = container_of(left, struct c2c_hist_entry, he); \ + c2c_right = container_of(right, struct c2c_hist_entry, he); \ + return c2c_left->stats.__f - c2c_right->stats.__f; \ +} + +#define STAT_FN(__f) \ + STAT_FN_ENTRY(__f) \ + STAT_FN_CMP(__f) + +STAT_FN(rmt_hitm) +STAT_FN(lcl_hitm) + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -408,10 +472,55 @@ static struct c2c_dimension dim_iaddr = { .width = 18, }; +static struct c2c_dimension dim_tot_hitm = { + .header = HEADER_SPAN("----- LLC Load Hitm -----", "Total", 2), + .name = "tot_hitm", + .cmp = tot_hitm_cmp, + .entry = tot_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_lcl_hitm = { + .header = HEADER_SPAN_LOW("Lcl"), + .name = "lcl_hitm", + .cmp = lcl_hitm_cmp, + .entry = lcl_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_rmt_hitm = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "rmt_hitm", + .cmp = rmt_hitm_cmp, + .entry = rmt_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_rmt_hitm = { + .header = HEADER_SPAN("----- HITM -----", "Rmt", 1), + .name = "cl_rmt_hitm", + .cmp = rmt_hitm_cmp, + .entry = rmt_hitm_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_lcl_hitm = { + .header = HEADER_SPAN_LOW("Lcl"), + .name = "cl_lcl_hitm", + .cmp = lcl_hitm_cmp, + .entry = lcl_hitm_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, &dim_iaddr, + &dim_tot_hitm, + &dim_lcl_hitm, + &dim_rmt_hitm, + &dim_cl_lcl_hitm, + &dim_cl_rmt_hitm, NULL, }; From 0f18896de4e1e833188be2c7086817574df205a5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 10:10:11 +0200 Subject: [PATCH 17/52] perf c2c report: Add stores related dimension keys Add 5 stores related dimension key wrappers. First 3 are to be displayed in the main cachelines overall output: stores, stores_l1hit, stores_l1miss The latter 2 are to be displayed within single cacheline output: cl_stores_l1hit, cl_stores_l1miss They all display bare numbers of stores for cacheline or its related offsets. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-qeml8v53v6q3wl5n8vgbf64r@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 48 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 039e7369dc6c..bfa0be398d5d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -412,6 +412,9 @@ __f ## _cmp(struct perf_hpp_fmt *fmt __maybe_unused, \ STAT_FN(rmt_hitm) STAT_FN(lcl_hitm) +STAT_FN(store) +STAT_FN(st_l1hit) +STAT_FN(st_l1miss) #define HEADER_LOW(__h) \ { \ @@ -512,6 +515,46 @@ static struct c2c_dimension dim_cl_lcl_hitm = { .width = 7, }; +static struct c2c_dimension dim_stores = { + .header = HEADER_SPAN("---- Store Reference ----", "Total", 2), + .name = "stores", + .cmp = store_cmp, + .entry = store_entry, + .width = 7, +}; + +static struct c2c_dimension dim_stores_l1hit = { + .header = HEADER_SPAN_LOW("L1Hit"), + .name = "stores_l1hit", + .cmp = st_l1hit_cmp, + .entry = st_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_stores_l1miss = { + .header = HEADER_SPAN_LOW("L1Miss"), + .name = "stores_l1miss", + .cmp = st_l1miss_cmp, + .entry = st_l1miss_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_stores_l1hit = { + .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1), + .name = "cl_stores_l1hit", + .cmp = st_l1hit_cmp, + .entry = st_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_cl_stores_l1miss = { + .header = HEADER_SPAN_LOW("L1 Miss"), + .name = "cl_stores_l1miss", + .cmp = st_l1miss_cmp, + .entry = st_l1miss_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -521,6 +564,11 @@ static struct c2c_dimension *dimensions[] = { &dim_rmt_hitm, &dim_cl_lcl_hitm, &dim_cl_rmt_hitm, + &dim_stores, + &dim_stores_l1hit, + &dim_stores_l1miss, + &dim_cl_stores_l1hit, + &dim_cl_stores_l1miss, NULL, }; From 1295f6854095d0f9537afb99516b6b30b208d227 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 10:18:24 +0200 Subject: [PATCH 18/52] perf c2c report: Add loads related dimension keys Add 3 loads related dimension key wrappers. They are to be displayed in the main cachelines overall output: ld_fbhit, ld_l1hit, ld_l2hit They all display bare numbers of loads for FB (Fill Buffer), L1 and L2 cache. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wxrzhy74zl8fvkvgjae3w1ju@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bfa0be398d5d..2b9d24fdcaee 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -415,6 +415,9 @@ STAT_FN(lcl_hitm) STAT_FN(store) STAT_FN(st_l1hit) STAT_FN(st_l1miss) +STAT_FN(ld_fbhit) +STAT_FN(ld_l1hit) +STAT_FN(ld_l2hit) #define HEADER_LOW(__h) \ { \ @@ -555,6 +558,30 @@ static struct c2c_dimension dim_cl_stores_l1miss = { .width = 7, }; +static struct c2c_dimension dim_ld_fbhit = { + .header = HEADER_SPAN("----- Core Load Hit -----", "FB", 2), + .name = "ld_fbhit", + .cmp = ld_fbhit_cmp, + .entry = ld_fbhit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_ld_l1hit = { + .header = HEADER_SPAN_LOW("L1"), + .name = "ld_l1hit", + .cmp = ld_l1hit_cmp, + .entry = ld_l1hit_entry, + .width = 7, +}; + +static struct c2c_dimension dim_ld_l2hit = { + .header = HEADER_SPAN_LOW("L2"), + .name = "ld_l2hit", + .cmp = ld_l2hit_cmp, + .entry = ld_l2hit_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -569,6 +596,9 @@ static struct c2c_dimension *dimensions[] = { &dim_stores_l1miss, &dim_cl_stores_l1hit, &dim_cl_stores_l1miss, + &dim_ld_fbhit, + &dim_ld_l1hit, + &dim_ld_l2hit, NULL, }; From 4d08910c94c5b460c1e0623b47ec2abc0ab0d1a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 10:27:51 +0200 Subject: [PATCH 19/52] perf c2c report: Add llc and remote loads related dimension keys Add 2 LLC load related dimension key wrappers. They are to be displayed in the main cachelines overall output: ld_lclhit, ld_rmthit They display bare numbers of LLC and remote loads for cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ahjg0voaufefboemjuj9yefh@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2b9d24fdcaee..6b601836b031 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -418,6 +418,8 @@ STAT_FN(st_l1miss) STAT_FN(ld_fbhit) STAT_FN(ld_l1hit) STAT_FN(ld_l2hit) +STAT_FN(ld_llchit) +STAT_FN(rmt_hit) #define HEADER_LOW(__h) \ { \ @@ -582,6 +584,22 @@ static struct c2c_dimension dim_ld_l2hit = { .width = 7, }; +static struct c2c_dimension dim_ld_llchit = { + .header = HEADER_SPAN("-- LLC Load Hit --", "Llc", 1), + .name = "ld_lclhit", + .cmp = ld_llchit_cmp, + .entry = ld_llchit_entry, + .width = 8, +}; + +static struct c2c_dimension dim_ld_rmthit = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "ld_rmthit", + .cmp = rmt_hit_cmp, + .entry = rmt_hit_entry, + .width = 8, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -599,6 +617,8 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_fbhit, &dim_ld_l1hit, &dim_ld_l2hit, + &dim_ld_llchit, + &dim_ld_rmthit, NULL, }; From 04402d205a40d38d1ebf4b48fb369bba39d4e05b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 19 May 2016 10:10:51 +0200 Subject: [PATCH 20/52] perf c2c report: Add llc load miss dimension key It is to be displayed in the main cachelines overall output: ld_llcmiss It displays bare number of LLC misses for cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wojujik7zzen770mxn295mxa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 47 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 6b601836b031..f525384dbbad 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -421,6 +421,44 @@ STAT_FN(ld_l2hit) STAT_FN(ld_llchit) STAT_FN(rmt_hit) +static uint64_t llc_miss(struct c2c_stats *stats) +{ + uint64_t llcmiss; + + llcmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + return llcmiss; +} + +static int +ld_llcmiss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + return scnprintf(hpp->buf, hpp->size, "%*lu", width, + llc_miss(&c2c_he->stats)); +} + +static int64_t +ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -600,6 +638,14 @@ static struct c2c_dimension dim_ld_rmthit = { .width = 8, }; +static struct c2c_dimension dim_ld_llcmiss = { + .header = HEADER_BOTH("LLC", "Ld Miss"), + .name = "ld_llcmiss", + .cmp = ld_llcmiss_cmp, + .entry = ld_llcmiss_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -619,6 +665,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_l2hit, &dim_ld_llchit, &dim_ld_rmthit, + &dim_ld_llcmiss, NULL, }; From 01b84d76928d580da4c9fc690d63f73d4ab2eae2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 10:35:29 +0200 Subject: [PATCH 21/52] perf c2c report: Add total record sort key It is to be displayed in the main cachelines overall output: tot_recs It displays sum of all cachelines accesses. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-wojujik7zzen770mxn295mxa@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 64 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f525384dbbad..ff5b5b81d333 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -459,6 +459,61 @@ ld_llcmiss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return llc_miss(&c2c_left->stats) - llc_miss(&c2c_right->stats); } +static uint64_t total_records(struct c2c_stats *stats) +{ + uint64_t lclmiss, ldcnt, total; + + lclmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + ldcnt = lclmiss + + stats->ld_fbhit + + stats->ld_l1hit + + stats->ld_l2hit + + stats->ld_llchit + + stats->lcl_hitm; + + total = ldcnt + + stats->st_l1hit + + stats->st_l1miss; + + return total; +} + +static int +tot_recs_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + uint64_t tot_recs; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_recs = total_records(&c2c_he->stats); + + return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs); +} + +static int64_t +tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + uint64_t tot_recs_left; + uint64_t tot_recs_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_recs_left = total_records(&c2c_left->stats); + tot_recs_right = total_records(&c2c_right->stats); + + return tot_recs_left - tot_recs_right; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -646,6 +701,14 @@ static struct c2c_dimension dim_ld_llcmiss = { .width = 7, }; +static struct c2c_dimension dim_tot_recs = { + .header = HEADER_BOTH("Total", "records"), + .name = "tot_recs", + .cmp = tot_recs_cmp, + .entry = tot_recs_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -666,6 +729,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_llchit, &dim_ld_rmthit, &dim_ld_llcmiss, + &dim_tot_recs, NULL, }; From 55177c4ea6696332e2de44370eed3a62d7fceb67 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 19 May 2016 09:52:37 +0200 Subject: [PATCH 22/52] perf c2c report: Add total loads sort key It is to be displayed in the main cachelines overall output: tot_loads It displays sum of all load accesses for cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-czd17qsh5u5z0yc1estz9l2y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 60 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 60 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ff5b5b81d333..2411fe025bc7 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -514,6 +514,57 @@ tot_recs_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return tot_recs_left - tot_recs_right; } +static uint64_t total_loads(struct c2c_stats *stats) +{ + uint64_t lclmiss, ldcnt; + + lclmiss = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hitm + + stats->rmt_hit; + + ldcnt = lclmiss + + stats->ld_fbhit + + stats->ld_l1hit + + stats->ld_l2hit + + stats->ld_llchit + + stats->lcl_hitm; + + return ldcnt; +} + +static int +tot_loads_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + uint64_t tot_recs; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + tot_recs = total_loads(&c2c_he->stats); + + return scnprintf(hpp->buf, hpp->size, "%*" PRIu64, width, tot_recs); +} + +static int64_t +tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + uint64_t tot_recs_left; + uint64_t tot_recs_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + tot_recs_left = total_loads(&c2c_left->stats); + tot_recs_right = total_loads(&c2c_right->stats); + + return tot_recs_left - tot_recs_right; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -709,6 +760,14 @@ static struct c2c_dimension dim_tot_recs = { .width = 7, }; +static struct c2c_dimension dim_tot_loads = { + .header = HEADER_BOTH("Total", "Loads"), + .name = "tot_loads", + .cmp = tot_loads_cmp, + .entry = tot_loads_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -730,6 +789,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_rmthit, &dim_ld_llcmiss, &dim_tot_recs, + &dim_tot_loads, NULL, }; From f0c50c15934e6300fca6fe2d1921dbd12a1fdf1c Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 10:50:09 +0200 Subject: [PATCH 23/52] perf c2c report: Add hitm percent sort key It is to be displayed in the main cachelines overall output: percent_hitm It displays HITMs percentage for cacheline. It counts remote HITMs at the moment, but it is changed later to support local as well, based on the sort configuration. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-czd17qsh5u5z0yc1estz9l2y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 91 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2411fe025bc7..dd356d88285c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -565,6 +565,87 @@ tot_loads_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return tot_recs_left - tot_recs_right; } +typedef double (get_percent_cb)(struct c2c_hist_entry *); + +static int +percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, get_percent_cb get_percent) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + double per; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + per = get_percent(c2c_he); + + return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per); +} + +static double percent_hitm(struct c2c_hist_entry *c2c_he) +{ + struct c2c_hists *hists; + struct c2c_stats *stats; + struct c2c_stats *total; + int tot, st; + double p; + + hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); + stats = &c2c_he->stats; + total = &hists->stats; + + st = stats->rmt_hitm; + tot = total->rmt_hitm; + + p = tot ? (double) st / tot : 0; + + return 100 * p; +} + +#define PERC_STR(__s, __v) \ +({ \ + scnprintf(__s, sizeof(__s), "%.2F%%", __v); \ + __s; \ +}) + +static int +percent_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + double per; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + per = percent_hitm(c2c_he); + return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per)); +} + +static int +percent_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_hitm); +} + +static int64_t +percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + struct c2c_hist_entry *c2c_left; + struct c2c_hist_entry *c2c_right; + double per_left; + double per_right; + + c2c_left = container_of(left, struct c2c_hist_entry, he); + c2c_right = container_of(right, struct c2c_hist_entry, he); + + per_left = percent_hitm(c2c_left); + per_right = percent_hitm(c2c_right); + + return per_left - per_right; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -768,6 +849,15 @@ static struct c2c_dimension dim_tot_loads = { .width = 7, }; +static struct c2c_dimension dim_percent_hitm = { + .header = HEADER_LOW("%hitm"), + .name = "percent_hitm", + .cmp = percent_hitm_cmp, + .entry = percent_hitm_entry, + .color = percent_hitm_color, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -790,6 +880,7 @@ static struct c2c_dimension *dimensions[] = { &dim_ld_llcmiss, &dim_tot_recs, &dim_tot_loads, + &dim_percent_hitm, NULL, }; From 9cb3500afc0980c5d6d44100d9c8217241e260c2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 4 May 2016 12:16:50 +0200 Subject: [PATCH 24/52] perf c2c report: Add hitm/store percent related sort keys They are to be displayed in the single cacheline output: percent_rmt_hitm, percent_lcl_hitm, percent_stores_l1hit, percent_stores_l1miss They display percentage of HITMs/stores for specific offset in the cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-t365aosxtdut8sgrgn8mfoe4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 202 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 202 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index dd356d88285c..bf4859fecc19 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -646,6 +646,167 @@ percent_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return per_left - per_right; } +static struct c2c_stats *he_stats(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return &c2c_he->stats; +} + +static struct c2c_stats *total_stats(struct hist_entry *he) +{ + struct c2c_hists *hists; + + hists = container_of(he->hists, struct c2c_hists, hists); + return &hists->stats; +} + +static double percent(int st, int tot) +{ + return tot ? 100. * (double) st / (double) tot : 0; +} + +#define PERCENT(__h, __f) percent(he_stats(__h)->__f, total_stats(__h)->__f) + +#define PERCENT_FN(__f) \ +static double percent_ ## __f(struct c2c_hist_entry *c2c_he) \ +{ \ + struct c2c_hists *hists; \ + \ + hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); \ + return percent(c2c_he->stats.__f, hists->stats.__f); \ +} + +PERCENT_FN(rmt_hitm) +PERCENT_FN(lcl_hitm) +PERCENT_FN(st_l1hit) +PERCENT_FN(st_l1miss) + +static int +percent_rmt_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, rmt_hitm); + char buf[10]; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per)); +} + +static int +percent_rmt_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_rmt_hitm); +} + +static int64_t +percent_rmt_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, lcl_hitm); + per_right = PERCENT(right, lcl_hitm); + + return per_left - per_right; +} + +static int +percent_lcl_hitm_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, lcl_hitm); + char buf[10]; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per)); +} + +static int +percent_lcl_hitm_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_lcl_hitm); +} + +static int64_t +percent_lcl_hitm_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, lcl_hitm); + per_right = PERCENT(right, lcl_hitm); + + return per_left - per_right; +} + +static int +percent_stores_l1hit_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, st_l1hit); + char buf[10]; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per)); +} + +static int +percent_stores_l1hit_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_st_l1hit); +} + +static int64_t +percent_stores_l1hit_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, st_l1hit); + per_right = PERCENT(right, st_l1hit); + + return per_left - per_right; +} + +static int +percent_stores_l1miss_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + double per = PERCENT(he, st_l1miss); + char buf[10]; + + return scnprintf(hpp->buf, hpp->size, "%*s", width, PERC_STR(buf, per)); +} + +static int +percent_stores_l1miss_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + return percent_color(fmt, hpp, he, percent_st_l1miss); +} + +static int64_t +percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + double per_left; + double per_right; + + per_left = PERCENT(left, st_l1miss); + per_right = PERCENT(right, st_l1miss); + + return per_left - per_right; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -858,6 +1019,42 @@ static struct c2c_dimension dim_percent_hitm = { .width = 7, }; +static struct c2c_dimension dim_percent_rmt_hitm = { + .header = HEADER_SPAN("----- HITM -----", "Rmt", 1), + .name = "percent_rmt_hitm", + .cmp = percent_rmt_hitm_cmp, + .entry = percent_rmt_hitm_entry, + .color = percent_rmt_hitm_color, + .width = 7, +}; + +static struct c2c_dimension dim_percent_lcl_hitm = { + .header = HEADER_SPAN_LOW("Lcl"), + .name = "percent_lcl_hitm", + .cmp = percent_lcl_hitm_cmp, + .entry = percent_lcl_hitm_entry, + .color = percent_lcl_hitm_color, + .width = 7, +}; + +static struct c2c_dimension dim_percent_stores_l1hit = { + .header = HEADER_SPAN("-- Store Refs --", "L1 Hit", 1), + .name = "percent_stores_l1hit", + .cmp = percent_stores_l1hit_cmp, + .entry = percent_stores_l1hit_entry, + .color = percent_stores_l1hit_color, + .width = 7, +}; + +static struct c2c_dimension dim_percent_stores_l1miss = { + .header = HEADER_SPAN_LOW("L1 Miss"), + .name = "percent_stores_l1miss", + .cmp = percent_stores_l1miss_cmp, + .entry = percent_stores_l1miss_entry, + .color = percent_stores_l1miss_color, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -881,6 +1078,10 @@ static struct c2c_dimension *dimensions[] = { &dim_tot_recs, &dim_tot_loads, &dim_percent_hitm, + &dim_percent_rmt_hitm, + &dim_percent_lcl_hitm, + &dim_percent_stores_l1hit, + &dim_percent_stores_l1miss, NULL, }; @@ -968,6 +1169,7 @@ static struct c2c_fmt *get_format(const char *name) fmt->cmp = dim->se ? c2c_se_cmp : dim->cmp; fmt->sort = dim->se ? c2c_se_cmp : dim->cmp; + fmt->color = dim->se ? NULL : dim->color; fmt->entry = dim->se ? c2c_se_entry : dim->entry; fmt->header = c2c_header; fmt->width = c2c_width; From 6c70f54cf7dcd1cfd7a2f6bf179bd36eafc14a61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 28 May 2016 12:30:13 +0200 Subject: [PATCH 25/52] perf c2c report: Add dram related sort keys They are to be displayed in the main cachelines overall output: dram_lcl, dram_rmt They display DRAM rmt/lcl access numbers for specific cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-tl3qqi9ehk6g1fla4z7y0ykd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index bf4859fecc19..c718d8b553ce 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -807,6 +807,9 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return per_left - per_right; } +STAT_FN(lcl_dram) +STAT_FN(rmt_dram) + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1055,6 +1058,22 @@ static struct c2c_dimension dim_percent_stores_l1miss = { .width = 7, }; +static struct c2c_dimension dim_dram_lcl = { + .header = HEADER_SPAN("--- Load Dram ----", "Lcl", 1), + .name = "dram_lcl", + .cmp = lcl_dram_cmp, + .entry = lcl_dram_entry, + .width = 8, +}; + +static struct c2c_dimension dim_dram_rmt = { + .header = HEADER_SPAN_LOW("Rmt"), + .name = "dram_rmt", + .cmp = rmt_dram_cmp, + .entry = rmt_dram_entry, + .width = 8, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1082,6 +1101,8 @@ static struct c2c_dimension *dimensions[] = { &dim_percent_lcl_hitm, &dim_percent_stores_l1hit, &dim_percent_stores_l1miss, + &dim_dram_lcl, + &dim_dram_rmt, NULL, }; From 36d3deb9d6d7f5f3b0b1dbb3398c413e4521af64 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2016 13:09:47 +0200 Subject: [PATCH 26/52] perf c2c report: Add 'pid' sort key It is to be displayed in the single cacheline output: pid We currently don't have a single 'pid' sort/display entry, which would output just pid number, hence adding it into c2c code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-3o23qrspxc99b04ci1swlzr6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c718d8b553ce..4795713ad211 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -810,6 +810,22 @@ percent_stores_l1miss_cmp(struct perf_hpp_fmt *fmt __maybe_unused, STAT_FN(lcl_dram) STAT_FN(rmt_dram) +static int +pid_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + + return scnprintf(hpp->buf, hpp->size, "%*d", width, he->thread->pid_); +} + +static int64_t +pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left, struct hist_entry *right) +{ + return left->thread->pid_ - right->thread->pid_; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1074,6 +1090,14 @@ static struct c2c_dimension dim_dram_rmt = { .width = 8, }; +static struct c2c_dimension dim_pid = { + .header = HEADER_LOW("Pid"), + .name = "pid", + .cmp = pid_cmp, + .entry = pid_entry, + .width = 7, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1103,6 +1127,7 @@ static struct c2c_dimension *dimensions[] = { &dim_percent_stores_l1miss, &dim_dram_lcl, &dim_dram_rmt, + &dim_pid, NULL, }; From e87019c5e8c6bc87e59bb01175479b1ab561c400 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 25 May 2016 08:50:10 +0200 Subject: [PATCH 27/52] perf c2c report: Add 'tid' sort key It is to be displayed in the single cacheline output: tid It's a wrapper for global sort_thread sort entry with c2c specific header. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-fr0socae5skzvz5qbkl85prn@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4795713ad211..77810a30a4cc 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1098,6 +1098,12 @@ static struct c2c_dimension dim_pid = { .width = 7, }; +static struct c2c_dimension dim_tid = { + .header = HEADER_LOW("Tid"), + .name = "tid", + .se = &sort_thread, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1128,6 +1134,7 @@ static struct c2c_dimension *dimensions[] = { &dim_dram_lcl, &dim_dram_rmt, &dim_pid, + &dim_tid, NULL, }; From 51dedaa446532da821fb1160fc4865ca37a54df3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2016 23:41:52 +0200 Subject: [PATCH 28/52] perf c2c report: Add 'symbol' and 'dso' sort keys They are to be displayed in the single cacheline output: symbol, dso They are wrappers for global sort_sym and sort_dso sort entries with c2c specific headers. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 77810a30a4cc..ffd41744886e 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1104,6 +1104,17 @@ static struct c2c_dimension dim_tid = { .se = &sort_thread, }; +static struct c2c_dimension dim_symbol = { + .name = "symbol", + .se = &sort_sym, +}; + +static struct c2c_dimension dim_dso = { + .header = HEADER_BOTH("Shared", "Object"), + .name = "dso", + .se = &sort_dso, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1135,6 +1146,8 @@ static struct c2c_dimension *dimensions[] = { &dim_dram_rmt, &dim_pid, &dim_tid, + &dim_symbol, + &dim_dso, NULL, }; @@ -1249,12 +1262,17 @@ static int c2c_hists__init_output(struct perf_hpp_list *hpp_list, char *name) static int c2c_hists__init_sort(struct perf_hpp_list *hpp_list, char *name) { struct c2c_fmt *c2c_fmt = get_format(name); + struct c2c_dimension *dim; if (!c2c_fmt) { reset_dimensions(); return sort_dimension__add(hpp_list, name, NULL, 0); } + dim = c2c_fmt->dim; + if (dim == &dim_dso) + hpp_list->dso = 1; + perf_hpp_list__register_sort_field(hpp_list, &c2c_fmt->fmt); return 0; } From 1e181b92a2da30ba1f80c61a41cfb9ef02f43b79 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 3 Jun 2016 15:40:28 +0200 Subject: [PATCH 29/52] perf c2c report: Add 'node' sort key It is to be displayed in the single cacheline output: node It displays nodes hits related to cacheline accesses. The node filed comes in 3 flavors: - node IDs separated by ',' - node IDs with stats for each ID, in following format: Node{cpus %hitms %stores} - node IDs with list of affected CPUs in following format: Node{cpu list} User can switch the flavor with -N option (-NN,-NNN). It will be available in TUI to switch this with 'n' key. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-6742e6g0r7n63y5wc4rrgxx5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 219 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ffd41744886e..ca2f37479e6d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,6 +1,7 @@ #include #include #include +#include #include "util.h" #include "debug.h" #include "builtin.h" @@ -22,6 +23,8 @@ struct c2c_hists { struct c2c_hist_entry { struct c2c_hists *hists; struct c2c_stats stats; + unsigned long *cpuset; + struct c2c_stats *node_stats; /* * must be at the end, * because of its callchain dynamic entry @@ -32,6 +35,12 @@ struct c2c_hist_entry { struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; + + unsigned long **nodes; + int nodes_cnt; + int cpus_cnt; + int *cpu2node; + int node_info; }; static struct perf_c2c c2c; @@ -44,6 +53,14 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he) return NULL; + c2c_he->cpuset = bitmap_alloc(c2c.cpus_cnt); + if (!c2c_he->cpuset) + return NULL; + + c2c_he->node_stats = zalloc(c2c.nodes_cnt * sizeof(*c2c_he->node_stats)); + if (!c2c_he->node_stats) + return NULL; + return &c2c_he->he; } @@ -57,6 +74,8 @@ static void c2c_he_free(void *he) free(c2c_he->hists); } + free(c2c_he->cpuset); + free(c2c_he->node_stats); free(c2c_he); } @@ -93,6 +112,16 @@ he__get_c2c_hists(struct hist_entry *he, return hists; } +static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, + struct perf_sample *sample) +{ + if (WARN_ONCE(sample->cpu == (unsigned int) -1, + "WARNING: no sample cpu value")) + return; + + set_bit(sample->cpu, c2c_he->cpuset); +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -133,10 +162,23 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_he__set_cpu(c2c_he, sample); + hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); if (!ret) { + /* + * There's already been warning about missing + * sample's cpu value. Let's account all to + * node 0 in this case, without any further + * warning. + * + * Doing node stats only for single callchain data. + */ + int cpu = sample->cpu == (unsigned int) -1 ? 0 : sample->cpu; + int node = c2c.cpu2node[cpu]; + mi = mi_dup; mi_dup = memdup(mi, sizeof(*mi)); @@ -156,6 +198,9 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_add_stats(&c2c_he->stats, &stats); c2c_add_stats(&c2c_hists->stats, &stats); + c2c_add_stats(&c2c_he->node_stats[node], &stats); + + c2c_he__set_cpu(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); ret = hist_entry__append_callchain(he, sample); @@ -826,6 +871,97 @@ pid_cmp(struct perf_hpp_fmt *fmt __maybe_unused, return left->thread->pid_ - right->thread->pid_; } +static int64_t +empty_cmp(struct perf_hpp_fmt *fmt __maybe_unused, + struct hist_entry *left __maybe_unused, + struct hist_entry *right __maybe_unused) +{ + return 0; +} + +static int +node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + bool first = true; + int node; + int ret = 0; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + for (node = 0; node < c2c.nodes_cnt; node++) { + DECLARE_BITMAP(set, c2c.cpus_cnt); + + bitmap_zero(set, c2c.cpus_cnt); + bitmap_and(set, c2c_he->cpuset, c2c.nodes[node], c2c.cpus_cnt); + + if (!bitmap_weight(set, c2c.cpus_cnt)) { + if (c2c.node_info == 1) { + ret = scnprintf(hpp->buf, hpp->size, "%21s", " "); + advance_hpp(hpp, ret); + } + continue; + } + + if (!first) { + ret = scnprintf(hpp->buf, hpp->size, " "); + advance_hpp(hpp, ret); + } + + switch (c2c.node_info) { + case 0: + ret = scnprintf(hpp->buf, hpp->size, "%2d", node); + advance_hpp(hpp, ret); + break; + case 1: + { + int num = bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt); + struct c2c_stats *stats = &c2c_he->node_stats[node]; + + ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); + advance_hpp(hpp, ret); + + + if (c2c_he->stats.rmt_hitm > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", + percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm)); + } else { + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + } + + advance_hpp(hpp, ret); + + if (c2c_he->stats.store > 0) { + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%%}", + percent(stats->store, c2c_he->stats.store)); + } else { + ret = scnprintf(hpp->buf, hpp->size, "%6s}", "n/a"); + } + + advance_hpp(hpp, ret); + break; + } + case 2: + ret = scnprintf(hpp->buf, hpp->size, "%2d{", node); + advance_hpp(hpp, ret); + + ret = bitmap_scnprintf(set, c2c.cpus_cnt, hpp->buf, hpp->size); + advance_hpp(hpp, ret); + + ret = scnprintf(hpp->buf, hpp->size, "}"); + advance_hpp(hpp, ret); + break; + default: + break; + } + + first = false; + } + + return 0; +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1115,6 +1251,19 @@ static struct c2c_dimension dim_dso = { .se = &sort_dso, }; +static struct c2c_header header_node[3] = { + HEADER_LOW("Node"), + HEADER_LOW("Node{cpus %hitms %stores}"), + HEADER_LOW("Node{cpu list}"), +}; + +static struct c2c_dimension dim_node = { + .name = "node", + .cmp = empty_cmp, + .entry = node_entry, + .width = 4, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1148,6 +1297,7 @@ static struct c2c_dimension *dimensions[] = { &dim_tid, &dim_symbol, &dim_dso, + &dim_node, NULL, }; @@ -1374,6 +1524,68 @@ static int resort_cl_cb(struct hist_entry *he) return 0; } +static void setup_nodes_header(void) +{ + dim_node.header = header_node[c2c.node_info]; +} + +static int setup_nodes(struct perf_session *session) +{ + struct numa_node *n; + unsigned long **nodes; + int node, cpu; + int *cpu2node; + + if (c2c.node_info > 2) + c2c.node_info = 2; + + c2c.nodes_cnt = session->header.env.nr_numa_nodes; + c2c.cpus_cnt = session->header.env.nr_cpus_online; + + n = session->header.env.numa_nodes; + if (!n) + return -EINVAL; + + nodes = zalloc(sizeof(unsigned long *) * c2c.nodes_cnt); + if (!nodes) + return -ENOMEM; + + c2c.nodes = nodes; + + cpu2node = zalloc(sizeof(int) * c2c.cpus_cnt); + if (!cpu2node) + return -ENOMEM; + + for (cpu = 0; cpu < c2c.cpus_cnt; cpu++) + cpu2node[cpu] = -1; + + c2c.cpu2node = cpu2node; + + for (node = 0; node < c2c.nodes_cnt; node++) { + struct cpu_map *map = n[node].map; + unsigned long *set; + + set = bitmap_alloc(c2c.cpus_cnt); + if (!set) + return -ENOMEM; + + for (cpu = 0; cpu < map->nr; cpu++) { + set_bit(map->map[cpu], set); + + if (WARN_ONCE(cpu2node[map->map[cpu]] != -1, "node/cpu topology bug")) + return -EINVAL; + + cpu2node[map->map[cpu]] = node; + } + + nodes[node] = set; + } + + setup_nodes_header(); + return 0; +} + + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -1388,6 +1600,8 @@ static int perf_c2c__report(int argc, const char **argv) "be more verbose (show counter open errors, etc)"), OPT_STRING('i', "input", &input_name, "file", "the input file to process"), + OPT_INCR('N', "node-info", &c2c.node_info, + "show extra node info in report (repeat for more info)"), OPT_END() }; int err = 0; @@ -1413,6 +1627,11 @@ static int perf_c2c__report(int argc, const char **argv) pr_debug("No memory for session\n"); goto out; } + err = setup_nodes(session); + if (err) { + pr_err("Failed setup nodes\n"); + goto out; + } if (symbol__init(&session->header.env) < 0) goto out_session; From 92062d543f1fb83b9b03ecafdb92f00e1328a992 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 5 Jun 2016 13:40:53 +0200 Subject: [PATCH 30/52] perf c2c report: Add stats related sort keys It is to be displayed in the single cacheline output: median, mean_rmt, mean_lcl, mean_load, stddev It displays statistics hits related to cacheline accesses. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-m1r4uc9lcykf1jhpvwk2gkj8@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 80 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index ca2f37479e6d..043344a720bf 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -20,11 +20,20 @@ struct c2c_hists { struct c2c_stats stats; }; +struct compute_stats { + struct stats lcl_hitm; + struct stats rmt_hitm; + struct stats load; +}; + struct c2c_hist_entry { struct c2c_hists *hists; struct c2c_stats stats; unsigned long *cpuset; struct c2c_stats *node_stats; + + struct compute_stats cstats; + /* * must be at the end, * because of its callchain dynamic entry @@ -61,6 +70,10 @@ static void *c2c_he_zalloc(size_t size) if (!c2c_he->node_stats) return NULL; + init_stats(&c2c_he->cstats.lcl_hitm); + init_stats(&c2c_he->cstats.rmt_hitm); + init_stats(&c2c_he->cstats.load); + return &c2c_he->he; } @@ -122,6 +135,20 @@ static void c2c_he__set_cpu(struct c2c_hist_entry *c2c_he, set_bit(sample->cpu, c2c_he->cpuset); } +static void compute_stats(struct c2c_hist_entry *c2c_he, + struct c2c_stats *stats, + u64 weight) +{ + struct compute_stats *cstats = &c2c_he->cstats; + + if (stats->rmt_hitm) + update_stats(&cstats->rmt_hitm, weight); + else if (stats->lcl_hitm) + update_stats(&cstats->lcl_hitm, weight); + else if (stats->load) + update_stats(&cstats->load, weight); +} + static int process_sample_event(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_sample *sample, @@ -200,6 +227,8 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, c2c_add_stats(&c2c_hists->stats, &stats); c2c_add_stats(&c2c_he->node_stats[node], &stats); + compute_stats(c2c_he, &stats, sample->weight); + c2c_he__set_cpu(c2c_he, sample); hists__inc_nr_samples(&c2c_hists->hists, he->filtered); @@ -962,6 +991,30 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, return 0; } +static int +mean_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, + struct hist_entry *he, double mean) +{ + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + scnprintf(buf, 10, "%6.0f", mean); + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +#define MEAN_ENTRY(__func, __val) \ +static int \ +__func(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct hist_entry *he) \ +{ \ + struct c2c_hist_entry *c2c_he; \ + c2c_he = container_of(he, struct c2c_hist_entry, he); \ + return mean_entry(fmt, hpp, he, avg_stats(&c2c_he->cstats.__val)); \ +} + +MEAN_ENTRY(mean_rmt_entry, rmt_hitm); +MEAN_ENTRY(mean_lcl_entry, lcl_hitm); +MEAN_ENTRY(mean_load_entry, load); + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1264,6 +1317,30 @@ static struct c2c_dimension dim_node = { .width = 4, }; +static struct c2c_dimension dim_mean_rmt = { + .header = HEADER_SPAN("---------- cycles ----------", "rmt hitm", 2), + .name = "mean_rmt", + .cmp = empty_cmp, + .entry = mean_rmt_entry, + .width = 8, +}; + +static struct c2c_dimension dim_mean_lcl = { + .header = HEADER_SPAN_LOW("lcl hitm"), + .name = "mean_lcl", + .cmp = empty_cmp, + .entry = mean_lcl_entry, + .width = 8, +}; + +static struct c2c_dimension dim_mean_load = { + .header = HEADER_SPAN_LOW("load"), + .name = "mean_load", + .cmp = empty_cmp, + .entry = mean_load_entry, + .width = 8, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1298,6 +1375,9 @@ static struct c2c_dimension *dimensions[] = { &dim_symbol, &dim_dso, &dim_node, + &dim_mean_rmt, + &dim_mean_lcl, + &dim_mean_load, NULL, }; From b6fe2bbc346ea6a838832d319e5fe7332fc75f3d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 23 Jun 2016 23:05:52 +0200 Subject: [PATCH 31/52] perf c2c report: Add 'cpucnt' sort key It is to be displayed in the single cacheline output: cpucnt It displays number of distinct cpus that hit cacheline. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ib2kdwam52fby9u2k3ij6lhm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 043344a720bf..2a1e883580a1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1015,6 +1015,20 @@ MEAN_ENTRY(mean_rmt_entry, rmt_hitm); MEAN_ENTRY(mean_lcl_entry, lcl_hitm); MEAN_ENTRY(mean_load_entry, load); +static int +cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + scnprintf(buf, 10, "%d", bitmap_weight(c2c_he->cpuset, c2c.cpus_cnt)); + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1341,6 +1355,14 @@ static struct c2c_dimension dim_mean_load = { .width = 8, }; +static struct c2c_dimension dim_cpucnt = { + .header = HEADER_BOTH("cpu", "cnt"), + .name = "cpucnt", + .cmp = empty_cmp, + .entry = cpucnt_entry, + .width = 8, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1378,6 +1400,7 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_rmt, &dim_mean_lcl, &dim_mean_load, + &dim_cpucnt, NULL, }; From 89d9ba8f5852f38927bb12cd9e62ffb8b1d89ccd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 10 Jul 2016 15:47:40 +0200 Subject: [PATCH 32/52] perf c2c report: Add src line sort key It is to be displayed in the single cacheline output: cl_srcline It displays source line related to the code address that accessed cacheline. It's a wrapper to global srcline sort entry. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-cmnzgm37mjz56ozsg4mnbgxq@git.kernel.org [ Remove __maybe_unused from now used 'he' parameter in filter_cb() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 13 ++++++++++++- tools/perf/util/sort.c | 2 +- tools/perf/util/sort.h | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 2a1e883580a1..0f898d3ff84c 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -50,6 +50,8 @@ struct perf_c2c { int cpus_cnt; int *cpu2node; int node_info; + + bool show_src; }; static struct perf_c2c c2c; @@ -1363,6 +1365,11 @@ static struct c2c_dimension dim_cpucnt = { .width = 8, }; +static struct c2c_dimension dim_srcline = { + .name = "cl_srcline", + .se = &sort_srcline, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1401,6 +1408,7 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_lcl, &dim_mean_load, &dim_cpucnt, + &dim_srcline, NULL, }; @@ -1606,8 +1614,11 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, return hpp_list__parse(&c2c_hists->list, output, sort); } -static int filter_cb(struct hist_entry *he __maybe_unused) +static int filter_cb(struct hist_entry *he) { + if (c2c.show_src && !he->srcline) + he->srcline = hist_entry__get_srcline(he); + return 0; } diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 452e15a10dd2..df622f4e301e 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -315,7 +315,7 @@ struct sort_entry sort_sym = { /* --sort srcline */ -static char *hist_entry__get_srcline(struct hist_entry *he) +char *hist_entry__get_srcline(struct hist_entry *he) { struct map *map = he->ms.map; diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 099c97557d33..7aff317fc7c4 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -280,4 +280,5 @@ int64_t sort__daddr_cmp(struct hist_entry *left, struct hist_entry *right); int64_t sort__dcacheline_cmp(struct hist_entry *left, struct hist_entry *right); +char *hist_entry__get_srcline(struct hist_entry *he); #endif /* __PERF_SORT_H */ From 1d62fcd693c0014dca74490a1139be36271f4af6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2016 10:12:31 +0200 Subject: [PATCH 33/52] perf c2c report: Setup number of header lines for hists Allow to setup number of header lines for c2c hists objects. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-4ilsf0ulubrd4y96g7tnpwzk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 0f898d3ff84c..44a85679a704 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -100,11 +100,13 @@ static struct hist_entry_ops c2c_entry_ops = { }; static int c2c_hists__init(struct c2c_hists *hists, - const char *sort); + const char *sort, + int nr_header_lines); static struct c2c_hists* he__get_c2c_hists(struct hist_entry *he, - const char *sort) + const char *sort, + int nr_header_lines) { struct c2c_hist_entry *c2c_he; struct c2c_hists *hists; @@ -118,7 +120,7 @@ he__get_c2c_hists(struct hist_entry *he, if (!hists) return NULL; - ret = c2c_hists__init(hists, sort); + ret = c2c_hists__init(hists, sort, nr_header_lines); if (ret) { free(hists); return NULL; @@ -214,7 +216,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - c2c_hists = he__get_c2c_hists(he, "offset"); + c2c_hists = he__get_c2c_hists(he, "offset", 2); if (!c2c_hists) goto free_mi_dup; @@ -1591,7 +1593,8 @@ static int hpp_list__parse(struct perf_hpp_list *hpp_list, } static int c2c_hists__init(struct c2c_hists *hists, - const char *sort) + const char *sort, + int nr_header_lines) { __hists__init(&hists->hists, &hists->list); @@ -1602,6 +1605,9 @@ static int c2c_hists__init(struct c2c_hists *hists, */ perf_hpp_list__init(&hists->list); + /* Overload number of header lines.*/ + hists->list.nr_header_lines = nr_header_lines; + return hpp_list__parse(&hists->list, NULL, sort); } @@ -1730,7 +1736,7 @@ static int perf_c2c__report(int argc, const char **argv) file.path = input_name; - err = c2c_hists__init(&c2c.hists, "dcacheline"); + err = c2c_hists__init(&c2c.hists, "dcacheline", 2); if (err) { pr_debug("Failed to initialize hists\n"); goto out; From 22dd59d1457408b69a95e2b5487a500f39d3c409 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 10 May 2016 14:08:29 +0200 Subject: [PATCH 34/52] perf c2c report: Set final resort fields Set resort/display fields for both cachelines and single cacheline displays. Cachelines are sorted on: rmt_hitm will be made configurable in following patches. Following fields are display for cachelines: dcacheline tot_recs percent_hitm tot_hitm,lcl_hitm,rmt_hitm stores,stores_l1hit,stores_l1miss dram_lcl,dram_rmt ld_llcmiss tot_loads ld_fbhit,ld_l1hit,ld_l2hit ld_lclhit,ld_rmthit The single cacheline is sort by: offset,rmt_hitm,lcl_hitm will be made configurable in following patches. Following fields are display for each cacheline: percent_rmt_hitm percent_lcl_hitm percent_stores_l1hit percent_stores_l1miss offset pid tid mean_rmt mean_lcl mean_load cpucnt symbol dso node Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-0rclftliywdq9qr2sjbugb6b@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 44a85679a704..c271261fcaf1 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1637,6 +1637,23 @@ static int resort_cl_cb(struct hist_entry *he) c2c_hists = c2c_he->hists; if (c2c_hists) { + c2c_hists__reinit(c2c_hists, + "percent_rmt_hitm," + "percent_lcl_hitm," + "percent_stores_l1hit," + "percent_stores_l1miss," + "offset," + "pid," + "tid," + "mean_rmt," + "mean_lcl," + "mean_load," + "cpucnt," + "symbol," + "dso," + "node", + "offset,rmt_hitm,lcl_hitm"); + hists__collapse_resort(&c2c_hists->hists, NULL); hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); } @@ -1768,6 +1785,20 @@ static int perf_c2c__report(int argc, const char **argv) goto out_session; } + c2c_hists__reinit(&c2c.hists, + "dcacheline," + "tot_recs," + "percent_hitm," + "tot_hitm,lcl_hitm,rmt_hitm," + "stores,stores_l1hit,stores_l1miss," + "dram_lcl,dram_rmt," + "ld_llcmiss," + "tot_loads," + "ld_fbhit,ld_l1hit,ld_l2hit," + "ld_lclhit,ld_rmthit", + "rmt_hitm" + ); + ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); From 2d388bd0c9d3e3fed9e4abdd9aadf2f07e9cf755 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 3 May 2016 14:32:56 +0200 Subject: [PATCH 35/52] perf c2c report: Add stdio output support Adding the --stdio option output support. The output tables are dumped directly to the stdio. $ perf c2c report ================================================= Shared Data Cache Line Table ================================================= # # Total ----- LLC Load Hitm ----- ---- Store Reference ---- --- Load Dram ---- LLC Total ----- Core Load Hit ----- -- LLC Load Hit -- # Cacheline records %hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2 Llc Rmt # .................. ....... ....... ....... ....... ....... ....... ....... ....... ........ ........ ....... ....... ....... ....... ....... ........ ........ # 0xffff88000235f840 17 0.00% 0 0 0 17 17 0 0 0 0 0 0 0 0 0 0 ... ================================================= Shared Cache Line Distribution Pareto ================================================= # # ----- HITM ----- -- Store Refs -- Data address ---------- cycles ---------- cpu Shared # Rmt Lcl L1 Hit L1 Miss Offset Pid Tid rmt hitm lcl hitm load cnt Symbol Object Node # ....... ....... ....... ....... .................. ....... ..................... ........ ........ ........ ........ .................... ................. .... # ------------------------------------------------------ 0 0 17 0 0xffff88000235f840 ------------------------------------------------------ 0.00% 0.00% 5.88% 0.00% 0x0 11474 11474:kworker/u16:5 0 0 0 1 [k] rmap_walk_file [kernel.kallsyms] 0 0.00% 0.00% 5.88% 0.00% 0x10 11474 11474:kworker/u16:5 0 0 0 1 [k] lock_page_memcg [kernel.kallsyms] 0 0.00% 0.00% 11.76% 0.00% 0x20 11474 11474:kworker/u16:5 0 0 0 1 [k] page_mapping [kernel.kallsyms] 0 0.00% 0.00% 64.71% 0.00% 0x28 11474 11474:kworker/u16:5 0 0 0 1 [k] __test_set_page_writeback [kernel.kallsyms] 0 0.00% 0.00% 11.76% 0.00% 0x30 11474 11474:kworker/u16:5 0 0 0 1 [k] page_mapped [kernel.kallsyms] 0 ... Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-eorco9r0oeesjve77pkkg43s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 83 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c271261fcaf1..33db26c6ca63 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,7 @@ #include "tool.h" #include "data.h" #include "sort.h" +#include struct c2c_hists { struct hists hists; @@ -1722,6 +1723,85 @@ static int setup_nodes(struct perf_session *session) return 0; } +static void print_cacheline(struct c2c_hists *c2c_hists, + struct hist_entry *he_cl, + struct perf_hpp_list *hpp_list, + FILE *out) +{ + char bf[1000]; + struct perf_hpp hpp = { + .buf = bf, + .size = 1000, + }; + static bool once; + + if (!once) { + hists__fprintf_headers(&c2c_hists->hists, out); + once = true; + } else { + fprintf(out, "\n"); + } + + fprintf(out, " ------------------------------------------------------\n"); + __hist_entry__snprintf(he_cl, &hpp, hpp_list); + fprintf(out, "%s\n", bf); + fprintf(out, " ------------------------------------------------------\n"); + + hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true); +} + +static void print_pareto(FILE *out) +{ + struct perf_hpp_list hpp_list; + struct rb_node *nd; + int ret; + + perf_hpp_list__init(&hpp_list); + ret = hpp_list__parse(&hpp_list, + "cl_rmt_hitm," + "cl_lcl_hitm," + "cl_stores_l1hit," + "cl_stores_l1miss," + "dcacheline", + NULL); + + if (WARN_ONCE(ret, "failed to setup sort entries\n")) + return; + + nd = rb_first(&c2c.hists.hists.entries); + + for (; nd; nd = rb_next(nd)) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + struct c2c_hist_entry *c2c_he; + + if (he->filtered) + continue; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + print_cacheline(c2c_he->hists, he, &hpp_list, out); + } +} + +static void perf_c2c__hists_fprintf(FILE *out) +{ + setup_pager(); + + fprintf(out, "\n"); + fprintf(out, "=================================================\n"); + fprintf(out, " Shared Data Cache Line Table \n"); + fprintf(out, "=================================================\n"); + fprintf(out, "#\n"); + + hists__fprintf(&c2c.hists.hists, true, 0, 0, 0, stdout, false); + + fprintf(out, "\n"); + fprintf(out, "=================================================\n"); + fprintf(out, " Shared Cache Line Distribution Pareto \n"); + fprintf(out, "=================================================\n"); + fprintf(out, "#\n"); + + print_pareto(out); +} static int perf_c2c__report(int argc, const char **argv) { @@ -1806,6 +1886,9 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); + use_browser = 0; + perf_c2c__hists_fprintf(stdout); + out_session: perf_session__delete(session); out: From 5a1a99cd2e4e15571a74f65facf05f806d5303fd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Jan 2016 16:59:02 +0100 Subject: [PATCH 36/52] perf c2c report: Add main TUI browser Add the main cachelines TUI browser. It allows to navigate through cachelines and display their details and callchains (implemented in the following patches). Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Kim Phillips Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-pk632k4h1uwc5t0lqc7k61zg@git.kernel.org Link: http://lkml.kernel.org/r/20161021001706.GB23970@krava [ Handle file with no entries, fixing segfault reported by Kim Phillips ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 117 ++++++++++++++++++++++++++++++++- tools/perf/ui/browsers/hists.c | 2 +- tools/perf/ui/browsers/hists.h | 1 + 3 files changed, 117 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 33db26c6ca63..34da2a3975b0 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -14,6 +14,7 @@ #include "data.h" #include "sort.h" #include +#include "ui/browsers/hists.h" struct c2c_hists { struct hists hists; @@ -53,6 +54,7 @@ struct perf_c2c { int node_info; bool show_src; + bool use_stdio; }; static struct perf_c2c c2c; @@ -657,6 +659,10 @@ percent_color(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, c2c_he = container_of(he, struct c2c_hist_entry, he); per = get_percent(c2c_he); +#ifdef HAVE_SLANG_SUPPORT + if (use_browser) + return __hpp__slsmg_color_printf(hpp, "%*.2f%%", width - 1, per); +#endif return hpp_color_scnprintf(hpp, "%*.2f%%", width - 1, per); } @@ -1077,6 +1083,8 @@ static struct c2c_dimension dim_dcacheline = { .width = 18, }; +static struct c2c_header header_offset_tui = HEADER_LOW("Off"); + static struct c2c_dimension dim_offset = { .header = HEADER_BOTH("Data address", "Offset"), .name = "offset", @@ -1803,6 +1811,100 @@ static void perf_c2c__hists_fprintf(FILE *out) print_pareto(out); } +#ifdef HAVE_SLANG_SUPPORT +static void c2c_browser__update_nr_entries(struct hist_browser *hb) +{ + u64 nr_entries = 0; + struct rb_node *nd = rb_first(&hb->hists->entries); + + while (nd) { + struct hist_entry *he = rb_entry(nd, struct hist_entry, rb_node); + + if (!he->filtered) + nr_entries++; + + nd = rb_next(nd); + } + + hb->nr_non_filtered_entries = nr_entries; +} + +static int perf_c2c_browser__title(struct hist_browser *browser, + char *bf, size_t size) +{ + scnprintf(bf, size, + "Shared Data Cache Line Table " + "(%lu entries)", browser->nr_non_filtered_entries); + return 0; +} + +static struct hist_browser* +perf_c2c_browser__new(struct hists *hists) +{ + struct hist_browser *browser = hist_browser__new(hists); + + if (browser) { + browser->title = perf_c2c_browser__title; + browser->c2c_filter = true; + } + + return browser; +} + +static int perf_c2c__hists_browse(struct hists *hists) +{ + struct hist_browser *browser; + int key = -1; + + browser = perf_c2c_browser__new(hists); + if (browser == NULL) + return -1; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + c2c_browser__update_nr_entries(browser); + + while (1) { + key = hist_browser__run(browser, "help"); + + switch (key) { + case 'q': + goto out; + default: + break; + } + } + +out: + hist_browser__delete(browser); + return 0; +} + +static void perf_c2c_display(void) +{ + if (c2c.use_stdio) + perf_c2c__hists_fprintf(stdout); + else + perf_c2c__hists_browse(&c2c.hists.hists); +} +#else +static void perf_c2c_display(void) +{ + use_browser = 0; + perf_c2c__hists_fprintf(stdout); +} +#endif /* HAVE_SLANG_SUPPORT */ + +static void ui_quirks(void) +{ + if (!c2c.use_stdio) { + dim_offset.width = 5; + dim_offset.header = header_offset_tui; + } +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -1819,6 +1921,9 @@ static int perf_c2c__report(int argc, const char **argv) "the input file to process"), OPT_INCR('N', "node-info", &c2c.node_info, "show extra node info in report (repeat for more info)"), +#ifdef HAVE_SLANG_SUPPORT + OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), +#endif OPT_END() }; int err = 0; @@ -1828,6 +1933,13 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, c2c_options); + if (c2c.use_stdio) + use_browser = 0; + else + use_browser = 1; + + setup_browser(false); + if (!input_name || !strlen(input_name)) input_name = "perf.data"; @@ -1886,8 +1998,9 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__finish(); - use_browser = 0; - perf_c2c__hists_fprintf(stdout); + ui_quirks(); + + perf_c2c_display(); out_session: perf_session__delete(session); diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 4ffff7be9299..31d6d5a7c2dc 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -30,7 +30,7 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser__has_filter(struct hist_browser *hb) { - return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter || hb->c2c_filter; } static int hist_browser__get_folding(struct hist_browser *browser) diff --git a/tools/perf/ui/browsers/hists.h b/tools/perf/ui/browsers/hists.h index 39bd0f28f211..23d6acb84800 100644 --- a/tools/perf/ui/browsers/hists.h +++ b/tools/perf/ui/browsers/hists.h @@ -18,6 +18,7 @@ struct hist_browser { u64 nr_non_filtered_entries; u64 nr_hierarchy_entries; u64 nr_callchain_rows; + bool c2c_filter; /* Get title string. */ int (*title)(struct hist_browser *browser, From f1c5fd4d0bb944da2138338dd361e06ce6c8385e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 May 2016 18:30:44 +0200 Subject: [PATCH 37/52] perf c2c report: Add TUI cacheline browser Adding simple TUI cacheline browser. It triggers when you press 'd' in the main browser on the specific cacheline. It allows to navigate through cacheline's offsets and display callchains (implemented in following patches). Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-fovjwgyusv3rz5qxk3hnahtl@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 81 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 34da2a3975b0..1415640c4aca 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1829,6 +1829,84 @@ static void c2c_browser__update_nr_entries(struct hist_browser *hb) hb->nr_non_filtered_entries = nr_entries; } +struct c2c_cacheline_browser { + struct hist_browser hb; + struct hist_entry *he; +}; + +static int +perf_c2c_cacheline_browser__title(struct hist_browser *browser, + char *bf, size_t size) +{ + struct c2c_cacheline_browser *cl_browser; + struct hist_entry *he; + uint64_t addr = 0; + + cl_browser = container_of(browser, struct c2c_cacheline_browser, hb); + he = cl_browser->he; + + if (he->mem_info) + addr = cl_address(he->mem_info->daddr.addr); + + scnprintf(bf, size, "Cacheline 0x%lx", addr); + return 0; +} + +static struct c2c_cacheline_browser* +c2c_cacheline_browser__new(struct hists *hists, struct hist_entry *he) +{ + struct c2c_cacheline_browser *browser; + + browser = zalloc(sizeof(*browser)); + if (browser) { + hist_browser__init(&browser->hb, hists); + browser->hb.c2c_filter = true; + browser->hb.title = perf_c2c_cacheline_browser__title; + browser->he = he; + } + + return browser; +} + +static int perf_c2c__browse_cacheline(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + struct c2c_hists *c2c_hists; + struct c2c_cacheline_browser *cl_browser; + struct hist_browser *browser; + int key = -1; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + c2c_hists = c2c_he->hists; + + cl_browser = c2c_cacheline_browser__new(&c2c_hists->hists, he); + if (cl_browser == NULL) + return -1; + + browser = &cl_browser->hb; + + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + c2c_browser__update_nr_entries(browser); + + while (1) { + key = hist_browser__run(browser, "help"); + + switch (key) { + case 'q': + goto out; + default: + break; + } + } + +out: + free(cl_browser); + return 0; +} + static int perf_c2c_browser__title(struct hist_browser *browser, char *bf, size_t size) { @@ -1872,6 +1950,9 @@ static int perf_c2c__hists_browse(struct hists *hists) switch (key) { case 'q': goto out; + case 'd': + perf_c2c__browse_cacheline(browser->he_selection); + break; default: break; } From 74c63a25f28350d298f2e529012a49bf6b5fdb46 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 2 May 2016 20:01:59 +0200 Subject: [PATCH 38/52] perf c2c report: Add global stats stdio output Display global stats table as part of the stdio output or when --stats option is speicified: $ perf c2c report --stats ================================================= Trace Event Information ================================================= Total records : 41237 Locked Load/Store Operations : 4075 Load Operations : 20526 Loads - uncacheable : 0 Loads - IO : 0 Loads - Miss : 552 Loads - no mapping : 31 Load Fill Buffer Hit : 7333 Load L1D hit : 6398 Load L2D hit : 144 Load LLC hit : 4889 Load Local HITM : 1185 Load Remote HITM : 838 Load Remote HIT : 52 Load Local DRAM : 183 Load Remote DRAM : 106 Load MESI State Exclusive : 289 Load MESI State Shared : 0 Load LLC Misses : 1179 LLC Misses to Local DRAM : 15.5% LLC Misses to Remote DRAM : 9.0% LLC Misses to Remote cache (HIT) : 4.4% LLC Misses to Remote cache (HITM) : 71.1% Store Operations : 20711 Store - uncacheable : 0 Store - no mapping : 1 Store L1D Hit : 20158 Store L1D Miss : 552 No Page Map Rejects : 7 Unable to parse data source : 0 Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-qkyvao3qsrnwazf0w1jvsh7z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 56 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 1415640c4aca..232d5f5ac7a2 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -55,6 +55,7 @@ struct perf_c2c { bool show_src; bool use_stdio; + bool stats_only; }; static struct perf_c2c c2c; @@ -1731,6 +1732,51 @@ static int setup_nodes(struct perf_session *session) return 0; } +static void print_c2c__display_stats(FILE *out) +{ + int llc_misses; + struct c2c_stats *stats = &c2c.hists.stats; + + llc_misses = stats->lcl_dram + + stats->rmt_dram + + stats->rmt_hit + + stats->rmt_hitm; + + fprintf(out, "=================================================\n"); + fprintf(out, " Trace Event Information \n"); + fprintf(out, "=================================================\n"); + fprintf(out, " Total records : %10d\n", stats->nr_entries); + fprintf(out, " Locked Load/Store Operations : %10d\n", stats->locks); + fprintf(out, " Load Operations : %10d\n", stats->load); + fprintf(out, " Loads - uncacheable : %10d\n", stats->ld_uncache); + fprintf(out, " Loads - IO : %10d\n", stats->ld_io); + fprintf(out, " Loads - Miss : %10d\n", stats->ld_miss); + fprintf(out, " Loads - no mapping : %10d\n", stats->ld_noadrs); + fprintf(out, " Load Fill Buffer Hit : %10d\n", stats->ld_fbhit); + fprintf(out, " Load L1D hit : %10d\n", stats->ld_l1hit); + fprintf(out, " Load L2D hit : %10d\n", stats->ld_l2hit); + fprintf(out, " Load LLC hit : %10d\n", stats->ld_llchit + stats->lcl_hitm); + fprintf(out, " Load Local HITM : %10d\n", stats->lcl_hitm); + fprintf(out, " Load Remote HITM : %10d\n", stats->rmt_hitm); + fprintf(out, " Load Remote HIT : %10d\n", stats->rmt_hit); + fprintf(out, " Load Local DRAM : %10d\n", stats->lcl_dram); + fprintf(out, " Load Remote DRAM : %10d\n", stats->rmt_dram); + fprintf(out, " Load MESI State Exclusive : %10d\n", stats->ld_excl); + fprintf(out, " Load MESI State Shared : %10d\n", stats->ld_shared); + fprintf(out, " Load LLC Misses : %10d\n", llc_misses); + fprintf(out, " LLC Misses to Local DRAM : %10.1f%%\n", ((double)stats->lcl_dram/(double)llc_misses) * 100.); + fprintf(out, " LLC Misses to Remote DRAM : %10.1f%%\n", ((double)stats->rmt_dram/(double)llc_misses) * 100.); + fprintf(out, " LLC Misses to Remote cache (HIT) : %10.1f%%\n", ((double)stats->rmt_hit /(double)llc_misses) * 100.); + fprintf(out, " LLC Misses to Remote cache (HITM) : %10.1f%%\n", ((double)stats->rmt_hitm/(double)llc_misses) * 100.); + fprintf(out, " Store Operations : %10d\n", stats->store); + fprintf(out, " Store - uncacheable : %10d\n", stats->st_uncache); + fprintf(out, " Store - no mapping : %10d\n", stats->st_noadrs); + fprintf(out, " Store L1D Hit : %10d\n", stats->st_l1hit); + fprintf(out, " Store L1D Miss : %10d\n", stats->st_l1miss); + fprintf(out, " No Page Map Rejects : %10d\n", stats->nomap); + fprintf(out, " Unable to parse data source : %10d\n", stats->noparse); +} + static void print_cacheline(struct c2c_hists *c2c_hists, struct hist_entry *he_cl, struct perf_hpp_list *hpp_list, @@ -1794,6 +1840,11 @@ static void perf_c2c__hists_fprintf(FILE *out) { setup_pager(); + print_c2c__display_stats(out); + + if (c2c.stats_only) + return; + fprintf(out, "\n"); fprintf(out, "=================================================\n"); fprintf(out, " Shared Data Cache Line Table \n"); @@ -2005,6 +2056,8 @@ static int perf_c2c__report(int argc, const char **argv) #ifdef HAVE_SLANG_SUPPORT OPT_BOOLEAN(0, "stdio", &c2c.use_stdio, "Use the stdio interface"), #endif + OPT_BOOLEAN(0, "stats", &c2c.stats_only, + "Use the stdio interface"), OPT_END() }; int err = 0; @@ -2014,6 +2067,9 @@ static int perf_c2c__report(int argc, const char **argv) if (argc) usage_with_options(report_c2c_usage, c2c_options); + if (c2c.stats_only) + c2c.use_stdio = true; + if (c2c.use_stdio) use_browser = 0; else From 7ef2efaab4adeabaf0d6a71dfa5651edb517b503 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 1 Jul 2016 11:12:11 +0200 Subject: [PATCH 39/52] perf c2c report: Add shared cachelines stats stdio output Display global shared cachelines related stats table as part of the stdio output or when --stats option is speicified: $ perf c2c report --stats ... ================================================= Global Shared Cache Line Event Information ================================================= Total Shared Cache Lines : 1384 Load HITs on shared lines : 5995 Fill Buffer Hits on shared lines : 1726 L1D hits on shared lines : 1943 L2D hits on shared lines : 0 LLC hits on shared lines : 1360 Locked Access on shared lines : 1993 Store HITs on shared lines : 1504 Store L1D hits on shared lines : 1446 Total Merged records : 3527 Original-patch-by: Dick Fowles Original-patch-by: Don Zickus Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-p0gty8ctbdzisrniwqxhqmhq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 62 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 61 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 232d5f5ac7a2..f7b118b759cf 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -56,6 +56,10 @@ struct perf_c2c { bool show_src; bool use_stdio; bool stats_only; + + /* HITM shared clines stats */ + struct c2c_stats hitm_stats; + int shared_clines; }; static struct perf_c2c c2c; @@ -1732,6 +1736,39 @@ static int setup_nodes(struct perf_session *session) return 0; } +#define HAS_HITMS(__h) ((__h)->stats.lcl_hitm || (__h)->stats.rmt_hitm) + +static int resort_hitm_cb(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + c2c_he = container_of(he, struct c2c_hist_entry, he); + + if (HAS_HITMS(c2c_he)) { + c2c.shared_clines++; + c2c_add_stats(&c2c.hitm_stats, &c2c_he->stats); + } + + return 0; +} + +static int hists__iterate_cb(struct hists *hists, hists__resort_cb_t cb) +{ + struct rb_node *next = rb_first(&hists->entries); + int ret = 0; + + while (next) { + struct hist_entry *he; + + he = rb_entry(next, struct hist_entry, rb_node); + ret = cb(he); + if (ret) + break; + next = rb_next(&he->rb_node); + } + + return ret; +} + static void print_c2c__display_stats(FILE *out) { int llc_misses; @@ -1777,6 +1814,26 @@ static void print_c2c__display_stats(FILE *out) fprintf(out, " Unable to parse data source : %10d\n", stats->noparse); } +static void print_shared_cacheline_info(FILE *out) +{ + struct c2c_stats *stats = &c2c.hitm_stats; + int hitm_cnt = stats->lcl_hitm + stats->rmt_hitm; + + fprintf(out, "=================================================\n"); + fprintf(out, " Global Shared Cache Line Event Information \n"); + fprintf(out, "=================================================\n"); + fprintf(out, " Total Shared Cache Lines : %10d\n", c2c.shared_clines); + fprintf(out, " Load HITs on shared lines : %10d\n", stats->load); + fprintf(out, " Fill Buffer Hits on shared lines : %10d\n", stats->ld_fbhit); + fprintf(out, " L1D hits on shared lines : %10d\n", stats->ld_l1hit); + fprintf(out, " L2D hits on shared lines : %10d\n", stats->ld_l2hit); + fprintf(out, " LLC hits on shared lines : %10d\n", stats->ld_llchit + stats->lcl_hitm); + fprintf(out, " Locked Access on shared lines : %10d\n", stats->locks); + fprintf(out, " Store HITs on shared lines : %10d\n", stats->store); + fprintf(out, " Store L1D hits on shared lines : %10d\n", stats->st_l1hit); + fprintf(out, " Total Merged records : %10d\n", hitm_cnt + stats->store); +} + static void print_cacheline(struct c2c_hists *c2c_hists, struct hist_entry *he_cl, struct perf_hpp_list *hpp_list, @@ -1841,6 +1898,8 @@ static void perf_c2c__hists_fprintf(FILE *out) setup_pager(); print_c2c__display_stats(out); + fprintf(out, "\n"); + print_shared_cacheline_info(out); if (c2c.stats_only) return; @@ -2131,7 +2190,8 @@ static int perf_c2c__report(int argc, const char **argv) ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); hists__collapse_resort(&c2c.hists.hists, NULL); - hists__output_resort_cb(&c2c.hists.hists, &prog, resort_cl_cb); + hists__output_resort_cb(&c2c.hists.hists, &prog, resort_hitm_cb); + hists__iterate_cb(&c2c.hists.hists, resort_cl_cb); ui_progress__finish(); From 2709b97dc2e250b5365629b99da5aa1cca7708f8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 27 Aug 2016 11:40:23 +0200 Subject: [PATCH 40/52] perf c2c report: Add c2c related stats stdio output Display c2c related configuration options/setup. So far it's output of monitored events: $ perf c2c report --stats ... ================================================= c2c details ================================================= Events : cpu/mem-loads,ldlat=50/pp : cpu/mem-stores/pp Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-ypz84f3a9fumyttrxurm458z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f7b118b759cf..88d88eac53aa 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -13,6 +13,8 @@ #include "tool.h" #include "data.h" #include "sort.h" +#include "evlist.h" +#include "evsel.h" #include #include "ui/browsers/hists.h" @@ -1893,13 +1895,32 @@ static void print_pareto(FILE *out) } } -static void perf_c2c__hists_fprintf(FILE *out) +static void print_c2c_info(FILE *out, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + bool first = true; + + fprintf(out, "=================================================\n"); + fprintf(out, " c2c details \n"); + fprintf(out, "=================================================\n"); + + evlist__for_each_entry(evlist, evsel) { + fprintf(out, "%-36s: %s\n", first ? " Events" : "", + perf_evsel__name(evsel)); + first = false; + } +} + +static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) { setup_pager(); print_c2c__display_stats(out); fprintf(out, "\n"); print_shared_cacheline_info(out); + fprintf(out, "\n"); + print_c2c_info(out, session); if (c2c.stats_only) return; @@ -2073,18 +2094,18 @@ static int perf_c2c__hists_browse(struct hists *hists) return 0; } -static void perf_c2c_display(void) +static void perf_c2c_display(struct perf_session *session) { if (c2c.use_stdio) - perf_c2c__hists_fprintf(stdout); + perf_c2c__hists_fprintf(stdout, session); else perf_c2c__hists_browse(&c2c.hists.hists); } #else -static void perf_c2c_display(void) +static void perf_c2c_display(struct perf_session *session) { use_browser = 0; - perf_c2c__hists_fprintf(stdout); + perf_c2c__hists_fprintf(stdout, session); } #endif /* HAVE_SLANG_SUPPORT */ @@ -2197,7 +2218,7 @@ static int perf_c2c__report(int argc, const char **argv) ui_quirks(); - perf_c2c_display(); + perf_c2c_display(session); out_session: perf_session__delete(session); From dd805768f7d74ba412817811cc60e2b153a43ee8 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 May 2016 18:23:48 +0200 Subject: [PATCH 41/52] perf c2c report: Allow to report callchains Add --call-graph option to properly setup callchain code. Adding default settings to display callchains whenever they are stored in the perf.data. Committer Notes: Testing it: [root@jouet ~]# perf c2c record -a -g sleep 5 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 5.331 MB perf.data (4263 samples) ] [root@jouet ~]# perf evlist -v cpu/mem-loads,ldlat=30/P: type: 4, size: 112, config: 0x1cd, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, mmap: 1, comm: 1, freq: 1, task: 1, precise_ip: 3, mmap_data: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1, { bp_addr, config1 }: 0x1f cpu/mem-stores/P: type: 4, size: 112, config: 0x82d0, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|ADDR|CALLCHAIN|ID|CPU|PERIOD|DATA_SRC|WEIGHT, read_format: ID, disabled: 1, inherit: 1, freq: 1, precise_ip: 3, sample_id_all: 1 [root@jouet ~]# perf c2c report --stats ================================================= Trace Event Information ================================================= Total records : 4263 Locked Load/Store Operations : 220 Load Operations : 2130 Loads - uncacheable : 1 Loads - IO : 7 Loads - Miss : 86 Loads - no mapping : 5 Load Fill Buffer Hit : 609 Load L1D hit : 612 ================================================= Trace Event Information ================================================= Total records : 4263 Locked Load/Store Operations : 220 Load Operations : 2130 Loads - uncacheable : 1 Loads - IO : 7 Loads - Miss : 86 Loads - no mapping : 5 Load Fill Buffer Hit : 609 Load L1D hit : 612 Load L2D hit : 27 Load LLC hit : 607 Load Local HITM : 15 Load Remote HITM : 0 Load Remote HIT : 0 Load Local DRAM : 176 Load Remote DRAM : 0 Load MESI State Exclusive : 176 Load MESI State Shared : 0 Load LLC Misses : 176 LLC Misses to Local DRAM : 100.0% LLC Misses to Remote DRAM : 0.0% LLC Misses to Remote cache (HIT) : 0.0% LLC Misses to Remote cache (HITM) : 0.0% Store Operations : 2133 Store - uncacheable : 0 Store - no mapping : 1 Store L1D Hit : 1967 Store L1D Miss : 165 No Page Map Rejects : 145 Unable to parse data source : 0 ================================================= Global Shared Cache Line Event Information ================================================= Total Shared Cache Lines : 15 Load HITs on shared lines : 26 Fill Buffer Hits on shared lines : 7 L1D hits on shared lines : 3 L2D hits on shared lines : 0 LLC hits on shared lines : 16 Locked Access on shared lines : 2 Store HITs on shared lines : 8 Store L1D hits on shared lines : 7 Total Merged records : 23 ================================================= c2c details ================================================= Events : cpu/mem-loads,ldlat=30/P : cpu/mem-stores/P [root@jouet ~]# [root@jouet ~]# perf c2c report Shared Data Cache Line Table (2378 entries) Total --- LLC Load Hitm -- -- Store Reference - - Load Dram - LLC Total - Core Load Hit - Cacheline records %hitm Total Lcl Rmt Total L1Hit L1Miss Lcl Rmt Ld Miss Loads FB L1 L2 - 0xffff880024380c00 10 0.00% 0 0 0 6 6 0 0 0 0 4 1 3 0 - 0.13% _raw_spin_lock_irqsave - 0.07% ep_poll sys_epoll_wait do_syscall_64 return_from_SYSCALL_64 + 0x103573 - 0.05% ep_poll_callback __wake_up_common - __wake_up_sync_key - 0.02% pipe_read __vfs_read vfs_read sys_read do_syscall_64 return_from_SYSCALL_64 0xfdad + 0.02% sock_def_readable + 0.02% ep_scan_ready_list.constprop.12 + 0.00% mutex_lock + 0.00% __wake_up_common + 0xffff880024380c40 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0 + 0xffff880024380c80 1 0.00% 0 0 0 0 0 0 0 0 0 1 0 0 0 - 0xffff8800243e9f00 1 0.00% 0 0 0 1 1 0 0 0 0 0 0 0 0 enqueue_entity enqueue_task_fair activate_task ttwu_do_activate try_to_wake_up wake_up_process hrtimer_wakeup __hrtimer_run_queues hrtimer_interrupt local_apic_timer_interrupt smp_apic_timer_interrupt apic_timer_interrupt cpuidle_enter call_cpuidle help ------------- And when presing 'd' to see the cacheline details: Cacheline 0xffff880024380c00 ----- HITM ----- -- Store Refs -- --------- cycles ----- cpu Rmt Lcl L1 Hit L1 Miss Off Pid Tid rmt hitm lcl hitm load cnt Symbol - 0.00% 0.00% 100.00% 0.00% 0x0 1473 1474:Chrome_ChildIOT 0 0 41 2 [k] _raw_spin_lock_irqsave [kernel] - _raw_spin_lock_irqsave - 51.52% ep_poll sys_epoll_wait do_syscall_64 return_from_SYSCALL_64 - 0x103573 47.19% 0 4.33% 0xc30bd - 35.93% ep_poll_callback __wake_up_common - __wake_up_sync_key - 18.20% pipe_read __vfs_read vfs_read sys_read do_syscall_64 return_from_SYSCALL_64 0xfdad - 17.73% sock_def_readable unix_stream_sendmsg sock_sendmsg ___sys_sendmsg __sys_sendmsg sys_sendmsg do_syscall_64 return_from_SYSCALL_64 __GI___libc_sendmsg 0x12c036af1fc0 0x16a4050 0x894928ec83485354 + 12.45% ep_scan_ready_list.constprop.12 + 0.00% 0.00% 0.00% 0.00% 0x8 1473 1474:Chrome_ChildIOT 0 0 102 1 [k] mutex_lock [kernel] + 0.00% 0.00% 0.00% 0.00% 0x38 1473 1473:chrome 0 0 88 1 [k] __wake_up_common [kernel] help Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18avr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 67 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 88d88eac53aa..950a4123ea66 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -17,6 +17,7 @@ #include "evsel.h" #include #include "ui/browsers/hists.h" +#include "evlist.h" struct c2c_hists { struct hists hists; @@ -183,6 +184,11 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, return -1; } + ret = sample__resolve_callchain(sample, &callchain_cursor, NULL, + evsel, &al, sysctl_perf_event_max_stack); + if (ret) + goto out; + mi = sample__resolve_mem(sample, &al); if (mi == NULL) return -ENOMEM; @@ -2117,6 +2123,58 @@ static void ui_quirks(void) } } +#define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" + +const char callchain_help[] = "Display call graph (stack chain/backtrace):\n\n" + CALLCHAIN_REPORT_HELP + "\n\t\t\t\tDefault: " CALLCHAIN_DEFAULT_OPT; + +static int +parse_callchain_opt(const struct option *opt, const char *arg, int unset) +{ + struct callchain_param *callchain = opt->value; + + callchain->enabled = !unset; + /* + * --no-call-graph + */ + if (unset) { + symbol_conf.use_callchain = false; + callchain->mode = CHAIN_NONE; + return 0; + } + + return parse_callchain_report_opt(arg); +} + +static int setup_callchain(struct perf_evlist *evlist) +{ + u64 sample_type = perf_evlist__combined_sample_type(evlist); + enum perf_call_graph_mode mode = CALLCHAIN_NONE; + + if ((sample_type & PERF_SAMPLE_REGS_USER) && + (sample_type & PERF_SAMPLE_STACK_USER)) + mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + mode = CALLCHAIN_LBR; + else if (sample_type & PERF_SAMPLE_CALLCHAIN) + mode = CALLCHAIN_FP; + + if (!callchain_param.enabled && + callchain_param.mode != CHAIN_NONE && + mode != CALLCHAIN_NONE) { + symbol_conf.use_callchain = true; + if (callchain_register_param(&callchain_param) < 0) { + ui__error("Can't register callchain params.\n"); + return -EINVAL; + } + } + + callchain_param.record_mode = mode; + callchain_param.min_percent = 0; + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -2124,6 +2182,7 @@ static int perf_c2c__report(int argc, const char **argv) struct perf_data_file file = { .mode = PERF_DATA_MODE_READ, }; + char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const struct option c2c_options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), @@ -2138,6 +2197,10 @@ static int perf_c2c__report(int argc, const char **argv) #endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Use the stdio interface"), + OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, + "print_type,threshold[,print_limit],order,sort_key[,branch],value", + callchain_help, &parse_callchain_opt, + callchain_default_opt), OPT_END() }; int err = 0; @@ -2179,6 +2242,10 @@ static int perf_c2c__report(int argc, const char **argv) goto out; } + err = setup_callchain(session->evlist); + if (err) + goto out_session; + if (symbol__init(&session->header.env) < 0) goto out_session; From 9857b7173cf420654a7a78a2cdf972ddb380a8a1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 17 Aug 2016 14:55:23 +0200 Subject: [PATCH 42/52] perf c2c report: Limit the cachelines table entries Add a limit for entries number of the cachelines table entries. By default now it's the 0.0005% minimum of remote HITMs. Also display only cachelines with remote hitm or store data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18avr@git.kernel.org [ Disabled for now ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 37 ++++++++++++++++++++++++++++++++++++- tools/perf/util/hist.c | 1 + tools/perf/util/hist.h | 1 + 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 950a4123ea66..92b3b8171d4d 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1642,11 +1642,45 @@ static int c2c_hists__reinit(struct c2c_hists *c2c_hists, return hpp_list__parse(&c2c_hists->list, output, sort); } +#define DISPLAY_LINE_LIMIT 0.0005 + +static bool he__display(struct hist_entry *he, struct c2c_stats *stats) +{ + struct c2c_hist_entry *c2c_he; + double ld_dist; + + /* XXX Disabled for now, till we get a command line switch to control this */ + return true; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + if (stats->rmt_hitm) { + ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm); + if (ld_dist < DISPLAY_LINE_LIMIT) + he->filtered = HIST_FILTER__C2C; + } else { + he->filtered = HIST_FILTER__C2C; + } + + return he->filtered == 0; +} + +static inline int valid_hitm_or_store(struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + return c2c_he->stats.rmt_hitm || c2c_he->stats.store; +} + static int filter_cb(struct hist_entry *he) { if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); + if (!valid_hitm_or_store(he)) + he->filtered = HIST_FILTER__C2C; + return 0; } @@ -1654,11 +1688,12 @@ static int resort_cl_cb(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; struct c2c_hists *c2c_hists; + bool display = he__display(he, &c2c.hitm_stats); c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; - if (c2c_hists) { + if (display && c2c_hists) { c2c_hists__reinit(c2c_hists, "percent_rmt_hitm," "percent_lcl_hitm," diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index b02992efb513..e1be4132054d 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -1195,6 +1195,7 @@ static void hist_entry__check_and_remove_filter(struct hist_entry *he, case HIST_FILTER__GUEST: case HIST_FILTER__HOST: case HIST_FILTER__SOCKET: + case HIST_FILTER__C2C: default: return; } diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 9928fed8bc59..d4b6514eeef5 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -22,6 +22,7 @@ enum hist_filter { HIST_FILTER__GUEST, HIST_FILTER__HOST, HIST_FILTER__SOCKET, + HIST_FILTER__C2C, }; enum hist_column { From 55b9577672b27f71843c07b9958129c4548a4090 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 29 May 2016 10:21:45 +0200 Subject: [PATCH 43/52] perf c2c report: Add support to choose local HITMs Currently we sort and limit displayed data based on the remote HITMs count. Adding support to switch to local HITMs via --display option: --display ... lcl,rmt Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-inykbom2f19difvsu1e18avr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 116 ++++++++++++++++++++++++++++++++------- 1 file changed, 96 insertions(+), 20 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 92b3b8171d4d..75bcf1406df3 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -63,6 +63,13 @@ struct perf_c2c { /* HITM shared clines stats */ struct c2c_stats hitm_stats; int shared_clines; + + int display; +}; + +enum { + DISPLAY_LCL, + DISPLAY_RMT, }; static struct perf_c2c c2c; @@ -684,15 +691,24 @@ static double percent_hitm(struct c2c_hist_entry *c2c_he) struct c2c_hists *hists; struct c2c_stats *stats; struct c2c_stats *total; - int tot, st; + int tot = 0, st = 0; double p; hists = container_of(c2c_he->he.hists, struct c2c_hists, hists); stats = &c2c_he->stats; total = &hists->stats; - st = stats->rmt_hitm; - tot = total->rmt_hitm; + switch (c2c.display) { + case DISPLAY_RMT: + st = stats->rmt_hitm; + tot = total->rmt_hitm; + break; + case DISPLAY_LCL: + st = stats->lcl_hitm; + tot = total->lcl_hitm; + default: + break; + } p = tot ? (double) st / tot : 0; @@ -975,14 +991,26 @@ node_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, ret = scnprintf(hpp->buf, hpp->size, "%2d{%2d ", node, num); advance_hpp(hpp, ret); - - if (c2c_he->stats.rmt_hitm > 0) { - ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", - percent(stats->rmt_hitm, c2c_he->stats.rmt_hitm)); - } else { - ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); + #define DISPLAY_HITM(__h) \ + if (c2c_he->stats.__h> 0) { \ + ret = scnprintf(hpp->buf, hpp->size, "%5.1f%% ", \ + percent(stats->__h, c2c_he->stats.__h));\ + } else { \ + ret = scnprintf(hpp->buf, hpp->size, "%6s ", "n/a"); \ } + switch (c2c.display) { + case DISPLAY_RMT: + DISPLAY_HITM(rmt_hitm); + break; + case DISPLAY_LCL: + DISPLAY_HITM(lcl_hitm); + default: + break; + } + + #undef DISPLAY_HITM + advance_hpp(hpp, ret); if (c2c_he->stats.store > 0) { @@ -1258,8 +1286,12 @@ static struct c2c_dimension dim_tot_loads = { .width = 7, }; +static struct c2c_header percent_hitm_header[] = { + [DISPLAY_LCL] = HEADER_BOTH("Lcl", "Hitm"), + [DISPLAY_RMT] = HEADER_BOTH("Rmt", "Hitm"), +}; + static struct c2c_dimension dim_percent_hitm = { - .header = HEADER_LOW("%hitm"), .name = "percent_hitm", .cmp = percent_hitm_cmp, .entry = percent_hitm_entry, @@ -1654,23 +1686,39 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) c2c_he = container_of(he, struct c2c_hist_entry, he); - if (stats->rmt_hitm) { - ld_dist = ((double)c2c_he->stats.rmt_hitm / stats->rmt_hitm); - if (ld_dist < DISPLAY_LINE_LIMIT) - he->filtered = HIST_FILTER__C2C; - } else { - he->filtered = HIST_FILTER__C2C; +#define FILTER_HITM(__h) \ + if (stats->__h) { \ + ld_dist = ((double)c2c_he->stats.__h / stats->__h); \ + if (ld_dist < DISPLAY_LINE_LIMIT) \ + he->filtered = HIST_FILTER__C2C; \ + } else { \ + he->filtered = HIST_FILTER__C2C; \ } + switch (c2c.display) { + case DISPLAY_LCL: + FILTER_HITM(lcl_hitm); + break; + case DISPLAY_RMT: + FILTER_HITM(rmt_hitm); + default: + break; + }; + +#undef FILTER_HITM + return he->filtered == 0; } static inline int valid_hitm_or_store(struct hist_entry *he) { struct c2c_hist_entry *c2c_he; + bool has_hitm; c2c_he = container_of(he, struct c2c_hist_entry, he); - return c2c_he->stats.rmt_hitm || c2c_he->stats.store; + has_hitm = c2c.display == DISPLAY_LCL ? + c2c_he->stats.lcl_hitm : c2c_he->stats.rmt_hitm; + return has_hitm || c2c_he->stats.store; } static int filter_cb(struct hist_entry *he) @@ -1951,6 +1999,8 @@ static void print_c2c_info(FILE *out, struct perf_session *session) perf_evsel__name(evsel)); first = false; } + fprintf(out, " Cachelines sort on : %s HITMs\n", + c2c.display == DISPLAY_LCL ? "Local" : "Remote"); } static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) @@ -2083,8 +2133,10 @@ static int perf_c2c_browser__title(struct hist_browser *browser, char *bf, size_t size) { scnprintf(bf, size, - "Shared Data Cache Line Table " - "(%lu entries)", browser->nr_non_filtered_entries); + "Shared Data Cache Line Table " + "(%lu entries, sorted on %s HITMs)", + browser->nr_non_filtered_entries, + c2c.display == DISPLAY_LCL ? "local" : "remote"); return 0; } @@ -2156,6 +2208,8 @@ static void ui_quirks(void) dim_offset.width = 5; dim_offset.header = header_offset_tui; } + + dim_percent_hitm.header = percent_hitm_header[c2c.display]; } #define CALLCHAIN_DEFAULT_OPT "graph,0.5,caller,function,percent" @@ -2210,6 +2264,22 @@ static int setup_callchain(struct perf_evlist *evlist) return 0; } +static int setup_display(const char *str) +{ + const char *display = str ?: "rmt"; + + if (!strcmp(display, "rmt")) + c2c.display = DISPLAY_RMT; + else if (!strcmp(display, "lcl")) + c2c.display = DISPLAY_LCL; + else { + pr_err("failed: unknown display type: %s\n", str); + return -1; + } + + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -2218,6 +2288,7 @@ static int perf_c2c__report(int argc, const char **argv) .mode = PERF_DATA_MODE_READ, }; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; + const char *display = NULL; const struct option c2c_options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), @@ -2236,6 +2307,7 @@ static int perf_c2c__report(int argc, const char **argv) "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, callchain_default_opt), + OPT_STRING('d', "display", &display, NULL, "lcl,rmt"), OPT_END() }; int err = 0; @@ -2260,6 +2332,10 @@ static int perf_c2c__report(int argc, const char **argv) file.path = input_name; + err = setup_display(display); + if (err) + goto out; + err = c2c_hists__init(&c2c.hists, "dcacheline", 2); if (err) { pr_debug("Failed to initialize hists\n"); @@ -2307,7 +2383,7 @@ static int perf_c2c__report(int argc, const char **argv) "tot_loads," "ld_fbhit,ld_l1hit,ld_l2hit," "ld_lclhit,ld_rmthit", - "rmt_hitm" + c2c.display == DISPLAY_LCL ? "lcl_hitm" : "rmt_hitm" ); ui_progress__init(&prog, c2c.hists.hists.nr_entries, "Sorting..."); From fc9c630e8ed118f3f73a100ccd5b130c7d610748 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 24 May 2016 14:14:38 +0200 Subject: [PATCH 44/52] perf c2c report: Allow to set cacheline sort fields Allowing user to configure the way the single cacheline data are sorted after being sorted by offset. Adding 'c' option to specify sorting fields for single cacheline: -c, --coalesce coalesce fields: pid,tid,iaddr,dso It's allowed to use following combination of fields: pid - process pid tid - process tid iaddr - code address dso - shared object Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-aka8z31umxoq2gqr5mjd81zr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 119 +++++++++++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 17 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 75bcf1406df3..f66a6f343702 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -46,6 +46,8 @@ struct c2c_hist_entry { struct hist_entry he; }; +static char const *coalesce_default = "pid,tid,iaddr"; + struct perf_c2c { struct perf_tool tool; struct c2c_hists hists; @@ -65,6 +67,11 @@ struct perf_c2c { int shared_clines; int display; + + const char *coalesce; + char *cl_sort; + char *cl_resort; + char *cl_output; }; enum { @@ -239,7 +246,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (!mi_dup) goto free_mi; - c2c_hists = he__get_c2c_hists(he, "offset", 2); + c2c_hists = he__get_c2c_hists(he, c2c.cl_sort, 2); if (!c2c_hists) goto free_mi_dup; @@ -1742,22 +1749,7 @@ static int resort_cl_cb(struct hist_entry *he) c2c_hists = c2c_he->hists; if (display && c2c_hists) { - c2c_hists__reinit(c2c_hists, - "percent_rmt_hitm," - "percent_lcl_hitm," - "percent_stores_l1hit," - "percent_stores_l1miss," - "offset," - "pid," - "tid," - "mean_rmt," - "mean_lcl," - "mean_load," - "cpucnt," - "symbol," - "dso," - "node", - "offset,rmt_hitm,lcl_hitm"); + c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); hists__collapse_resort(&c2c_hists->hists, NULL); hists__output_resort_cb(&c2c_hists->hists, NULL, filter_cb); @@ -2001,6 +1993,7 @@ static void print_c2c_info(FILE *out, struct perf_session *session) } fprintf(out, " Cachelines sort on : %s HITMs\n", c2c.display == DISPLAY_LCL ? "Local" : "Remote"); + fprintf(out, " Cacheline data grouping : %s\n", c2c.cl_sort); } static void perf_c2c__hists_fprintf(FILE *out, struct perf_session *session) @@ -2280,6 +2273,89 @@ static int setup_display(const char *str) return 0; } +#define for_each_token(__tok, __buf, __sep, __tmp) \ + for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \ + __tok = strtok_r(NULL, __sep, &__tmp)) + +static int build_cl_output(char *cl_sort) +{ + char *tok, *tmp, *buf = strdup(cl_sort); + bool add_pid = false; + bool add_tid = false; + bool add_iaddr = false; + bool add_sym = false; + bool add_dso = false; + bool add_src = false; + + if (!buf) + return -ENOMEM; + + for_each_token(tok, buf, ",", tmp) { + if (!strcmp(tok, "tid")) { + add_tid = true; + } else if (!strcmp(tok, "pid")) { + add_pid = true; + } else if (!strcmp(tok, "iaddr")) { + add_iaddr = true; + add_sym = true; + add_dso = true; + add_src = true; + } else if (!strcmp(tok, "dso")) { + add_dso = true; + } else if (strcmp(tok, "offset")) { + pr_err("unrecognized sort token: %s\n", tok); + return -EINVAL; + } + } + + if (asprintf(&c2c.cl_output, + "%s%s%s%s%s%s%s%s%s", + "percent_rmt_hitm," + "percent_lcl_hitm," + "percent_stores_l1hit," + "percent_stores_l1miss," + "offset,", + add_pid ? "pid," : "", + add_tid ? "tid," : "", + add_iaddr ? "iaddr," : "", + "mean_rmt," + "mean_lcl," + "mean_load," + "cpucnt,", + add_sym ? "symbol," : "", + add_dso ? "dso," : "", + add_src ? "cl_srcline," : "", + "node") < 0) + return -ENOMEM; + + c2c.show_src = add_src; + + free(buf); + return 0; +} + +static int setup_coalesce(const char *coalesce) +{ + const char *c = coalesce ?: coalesce_default; + + if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0) + return -ENOMEM; + + if (build_cl_output(c2c.cl_sort)) + return -1; + + if (asprintf(&c2c.cl_resort, "offset,%s", + c2c.display == DISPLAY_RMT ? + "rmt_hitm,lcl_hitm" : + "lcl_hitm,rmt_hitm") < 0) + return -ENOMEM; + + pr_debug("coalesce sort fields: %s\n", c2c.cl_sort); + pr_debug("coalesce resort fields: %s\n", c2c.cl_resort); + pr_debug("coalesce output fields: %s\n", c2c.cl_output); + return 0; +} + static int perf_c2c__report(int argc, const char **argv) { struct perf_session *session; @@ -2289,6 +2365,7 @@ static int perf_c2c__report(int argc, const char **argv) }; char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char *display = NULL; + const char *coalesce = NULL; const struct option c2c_options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), @@ -2308,6 +2385,8 @@ static int perf_c2c__report(int argc, const char **argv) callchain_help, &parse_callchain_opt, callchain_default_opt), OPT_STRING('d', "display", &display, NULL, "lcl,rmt"), + OPT_STRING('c', "coalesce", &coalesce, "coalesce fields", + "coalesce fields: pid,tid,iaddr,dso"), OPT_END() }; int err = 0; @@ -2336,6 +2415,12 @@ static int perf_c2c__report(int argc, const char **argv) if (err) goto out; + err = setup_coalesce(coalesce); + if (err) { + pr_debug("Failed to initialize hists\n"); + goto out; + } + err = c2c_hists__init(&c2c.hists, "dcacheline", 2); if (err) { pr_debug("Failed to initialize hists\n"); From 25aa84e32df06048d2de5c99fdd4a84fe9a3905f Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 7 Jun 2016 19:02:43 +0200 Subject: [PATCH 45/52] perf c2c report: Recalc width of global sort entries Using resort callbacks to compute the columns' width. Computing only the global ones, c2c entries have fixed width only. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-zyayvq2u3dzyf3y7i9jza0lw@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f66a6f343702..579c9e9f73c2 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1728,11 +1728,21 @@ static inline int valid_hitm_or_store(struct hist_entry *he) return has_hitm || c2c_he->stats.store; } +static void calc_width(struct hist_entry *he) +{ + struct c2c_hists *c2c_hists; + + c2c_hists = container_of(he->hists, struct c2c_hists, hists); + hists__calc_col_len(&c2c_hists->hists, he); +} + static int filter_cb(struct hist_entry *he) { if (c2c.show_src && !he->srcline) he->srcline = hist_entry__get_srcline(he); + calc_width(he); + if (!valid_hitm_or_store(he)) he->filtered = HIST_FILTER__C2C; @@ -1748,6 +1758,8 @@ static int resort_cl_cb(struct hist_entry *he) c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; + calc_width(he); + if (display && c2c_hists) { c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); From bb342daed8903af7fa984a1e227f4a44f1b36b88 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Jul 2016 15:40:09 +0200 Subject: [PATCH 46/52] perf c2c report: Add cacheline index entry It's convenient to have an index for each cacheline to help discussions about results over the phone. Add new 'Index' and 'Num' fields in main and single cacheline tables. $ perf c2c report ================================================= Shared Data Cache Line Table ================================================= # # Total Lcl ----- LLC Load Hitm ----- # Index Cacheline records Hitm Total Lcl Rmt ... # ..... .................. ....... ....... ....... ....... ....... # 0 0xffff880036233b40 1 11.11% 1 1 0 1 0xffff88009ccb2900 1 11.11% 1 1 0 2 0xffff8800b5b3bc40 7 11.11% 1 1 0 ... ================================================= Shared Cache Line Distribution Pareto ================================================= # # ----- HITM ----- -- Store Refs -- Data address # Num Rmt Lcl L1 Hit L1 Miss Offset Pid ... # ..... ....... ....... ....... ....... .................. ....... # ------------------------------------------------------------- 0 0 1 0 0 0xffff880036233b40 ------------------------------------------------------------- 0.00% 100.00% 0.00% 0.00% 0x30 0 ------------------------------------------------------------- 1 0 1 0 0 0xffff88009ccb2900 ------------------------------------------------------------- 0.00% 100.00% 0.00% 0.00% 0x28 549 ... Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-4dhfagaz57tvrfjbg8nd2h4u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 64 ++++++++++++++++++++++++++++++++++++++-- 1 file changed, 61 insertions(+), 3 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 579c9e9f73c2..b3e48e42e825 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -36,6 +36,7 @@ struct c2c_hist_entry { struct c2c_stats stats; unsigned long *cpuset; struct c2c_stats *node_stats; + unsigned int cacheline_idx; struct compute_stats cstats; @@ -1088,6 +1089,29 @@ cpucnt_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); } +static int +cl_idx_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + struct c2c_hist_entry *c2c_he; + int width = c2c_width(fmt, hpp, he->hists); + char buf[10]; + + c2c_he = container_of(he, struct c2c_hist_entry, he); + + scnprintf(buf, 10, "%u", c2c_he->cacheline_idx); + return scnprintf(hpp->buf, hpp->size, "%*s", width, buf); +} + +static int +cl_idx_empty_entry(struct perf_hpp_fmt *fmt __maybe_unused, struct perf_hpp *hpp, + struct hist_entry *he) +{ + int width = c2c_width(fmt, hpp, he->hists); + + return scnprintf(hpp->buf, hpp->size, "%*s", width, ""); +} + #define HEADER_LOW(__h) \ { \ .line[1] = { \ @@ -1433,6 +1457,30 @@ static struct c2c_dimension dim_srcline = { .se = &sort_srcline, }; +static struct c2c_dimension dim_dcacheline_idx = { + .header = HEADER_LOW("Index"), + .name = "cl_idx", + .cmp = empty_cmp, + .entry = cl_idx_entry, + .width = 5, +}; + +static struct c2c_dimension dim_dcacheline_num = { + .header = HEADER_LOW("Num"), + .name = "cl_num", + .cmp = empty_cmp, + .entry = cl_idx_entry, + .width = 5, +}; + +static struct c2c_dimension dim_dcacheline_num_empty = { + .header = HEADER_LOW("Num"), + .name = "cl_num_empty", + .cmp = empty_cmp, + .entry = cl_idx_empty_entry, + .width = 5, +}; + static struct c2c_dimension *dimensions[] = { &dim_dcacheline, &dim_offset, @@ -1472,6 +1520,9 @@ static struct c2c_dimension *dimensions[] = { &dim_mean_load, &dim_cpucnt, &dim_srcline, + &dim_dcacheline_idx, + &dim_dcacheline_num, + &dim_dcacheline_num_empty, NULL, }; @@ -1761,6 +1812,10 @@ static int resort_cl_cb(struct hist_entry *he) calc_width(he); if (display && c2c_hists) { + static unsigned int idx; + + c2c_he->cacheline_idx = idx++; + c2c_hists__reinit(c2c_hists, c2c.cl_output, c2c.cl_resort); hists__collapse_resort(&c2c_hists->hists, NULL); @@ -1948,10 +2003,10 @@ static void print_cacheline(struct c2c_hists *c2c_hists, fprintf(out, "\n"); } - fprintf(out, " ------------------------------------------------------\n"); + fprintf(out, " -------------------------------------------------------------\n"); __hist_entry__snprintf(he_cl, &hpp, hpp_list); fprintf(out, "%s\n", bf); - fprintf(out, " ------------------------------------------------------\n"); + fprintf(out, " -------------------------------------------------------------\n"); hists__fprintf(&c2c_hists->hists, false, 0, 0, 0, out, true); } @@ -1964,6 +2019,7 @@ static void print_pareto(FILE *out) perf_hpp_list__init(&hpp_list); ret = hpp_list__parse(&hpp_list, + "cl_num," "cl_rmt_hitm," "cl_lcl_hitm," "cl_stores_l1hit," @@ -2321,7 +2377,8 @@ static int build_cl_output(char *cl_sort) } if (asprintf(&c2c.cl_output, - "%s%s%s%s%s%s%s%s%s", + "%s%s%s%s%s%s%s%s%s%s", + c2c.use_stdio ? "cl_num_empty," : "", "percent_rmt_hitm," "percent_lcl_hitm," "percent_stores_l1hit," @@ -2470,6 +2527,7 @@ static int perf_c2c__report(int argc, const char **argv) } c2c_hists__reinit(&c2c.hists, + "cl_idx," "dcacheline," "tot_recs," "percent_hitm," From 590b6a3ac5133e3fff9ac6f44af0dc0f3eb7c397 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 10 Jul 2016 16:25:15 +0200 Subject: [PATCH 47/52] perf c2c report: Add support to manage symbol name length The width of symbol and source line entries could get really long and not convenient to display. Adding support to display only patrt of such strings and possibility to switch to full length by uing --full-symbols option or 's' key in TUI browser. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-yxf5hfteyfaoi8xrgczqtyha@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index b3e48e42e825..4645461bd997 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -62,6 +62,7 @@ struct perf_c2c { bool show_src; bool use_stdio; bool stats_only; + bool symbol_full; /* HITM shared clines stats */ struct c2c_stats hitm_stats; @@ -336,6 +337,21 @@ struct c2c_fmt { struct c2c_dimension *dim; }; +#define SYMBOL_WIDTH 30 + +static struct c2c_dimension dim_symbol; +static struct c2c_dimension dim_srcline; + +static int symbol_width(struct hists *hists, struct sort_entry *se) +{ + int width = hists__col_len(hists, se->se_width_idx); + + if (!c2c.symbol_full) + width = MIN(width, SYMBOL_WIDTH); + + return width; +} + static int c2c_width(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp __maybe_unused, struct hists *hists __maybe_unused) @@ -346,6 +362,9 @@ static int c2c_width(struct perf_hpp_fmt *fmt, c2c_fmt = container_of(fmt, struct c2c_fmt, fmt); dim = c2c_fmt->dim; + if (dim == &dim_symbol || dim == &dim_srcline) + return symbol_width(hists, dim->se); + return dim->se ? hists__col_len(hists, dim->se->se_width_idx) : c2c_fmt->dim->width; } @@ -1563,9 +1582,13 @@ static int c2c_se_entry(struct perf_hpp_fmt *fmt, struct perf_hpp *hpp, struct c2c_dimension *dim = c2c_fmt->dim; size_t len = fmt->user_len; - if (!len) + if (!len) { len = hists__col_len(he->hists, dim->se->se_width_idx); + if (dim == &dim_symbol || dim == &dim_srcline) + len = symbol_width(he->hists, dim->se); + } + return dim->se->se_snprintf(he, hpp->buf, hpp->size, len); } @@ -2159,6 +2182,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct hist_browser *browser; int key = -1; + /* Display compact version first. */ + c2c.symbol_full = false; + c2c_he = container_of(he, struct c2c_hist_entry, he); c2c_hists = c2c_he->hists; @@ -2178,6 +2204,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) key = hist_browser__run(browser, "help"); switch (key) { + case 's': + c2c.symbol_full = !c2c.symbol_full; + break; case 'q': goto out; default: @@ -2449,6 +2478,8 @@ static int perf_c2c__report(int argc, const char **argv) #endif OPT_BOOLEAN(0, "stats", &c2c.stats_only, "Use the stdio interface"), + OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, + "Display full length of symbols"), OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, From 1a56a42534da8f1d2faf8df99787833ce67e8fdb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 10 Jul 2016 16:30:27 +0200 Subject: [PATCH 48/52] perf c2c report: Iterate node display in browser Adding TUI support to switch between Node entry versions in real time with 'n' key. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-xqbw4h4dxig54wff7fd14lao@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4645461bd997..205939527945 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2207,6 +2207,10 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) case 's': c2c.symbol_full = !c2c.symbol_full; break; + case 'n': + c2c.node_info = (c2c.node_info + 1) % 3; + setup_nodes_header(); + break; case 'q': goto out; default: From 9a406eb610e3676611ce3e32d2e6c55ccc7e5d61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 17 Aug 2016 15:54:58 +0200 Subject: [PATCH 49/52] perf c2c report: Add help windows Adding help windows to display key/action mappings for both browsers. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-zni4apopx6a9eyxsosm1ebh1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 205939527945..4af66835bcb7 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2181,6 +2181,11 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct c2c_cacheline_browser *cl_browser; struct hist_browser *browser; int key = -1; + const char help[] = + " ENTER Togle callchains (if present) \n" + " n Togle Node details info \n" + " s Togle full lenght of symbol and source line columns \n" + " q Return back to cacheline list \n"; /* Display compact version first. */ c2c.symbol_full = false; @@ -2201,7 +2206,7 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "help"); + key = hist_browser__run(browser, "? - help"); switch (key) { case 's': @@ -2213,6 +2218,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) break; case 'q': goto out; + case '?': + ui_browser__help_window(&browser->b, help); + break; default: break; } @@ -2251,6 +2259,10 @@ static int perf_c2c__hists_browse(struct hists *hists) { struct hist_browser *browser; int key = -1; + const char help[] = + " d Display cacheline details \n" + " ENTER Togle callchains (if present) \n" + " q Quit \n"; browser = perf_c2c_browser__new(hists); if (browser == NULL) @@ -2263,7 +2275,7 @@ static int perf_c2c__hists_browse(struct hists *hists) c2c_browser__update_nr_entries(browser); while (1) { - key = hist_browser__run(browser, "help"); + key = hist_browser__run(browser, "? - help"); switch (key) { case 'q': @@ -2271,6 +2283,9 @@ static int perf_c2c__hists_browse(struct hists *hists) case 'd': perf_c2c__browse_cacheline(browser->he_selection); break; + case '?': + ui_browser__help_window(&browser->b, help); + break; default: break; } From 465f27a3b2a21cdd1561537c8f2cf293b1d77da4 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Fri, 26 Aug 2016 10:36:12 +0200 Subject: [PATCH 50/52] perf c2c: Add man page and credits Add man page for c2c command and credits to builtin-c2c.c file. Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-twbp391v8v9f5idp584hlfov@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 276 ++++++++++++++++++++++++++ tools/perf/builtin-c2c.c | 11 + 2 files changed, 287 insertions(+) create mode 100644 tools/perf/Documentation/perf-c2c.txt diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt new file mode 100644 index 000000000000..ba2f4de399c3 --- /dev/null +++ b/tools/perf/Documentation/perf-c2c.txt @@ -0,0 +1,276 @@ +perf-c2c(1) +=========== + +NAME +---- +perf-c2c - Shared Data C2C/HITM Analyzer. + +SYNOPSIS +-------- +[verse] +'perf c2c record' [] +'perf c2c record' [] -- [] +'perf c2c report' [] + +DESCRIPTION +----------- +C2C stands for Cache To Cache. + +The perf c2c tool provides means for Shared Data C2C/HITM analysis. It allows +you to track down the cacheline contentions. + +The tool is based on x86's load latency and precise store facility events +provided by Intel CPUs. These events provide: + - memory address of the access + - type of the access (load and store details) + - latency (in cycles) of the load access + +The c2c tool provide means to record this data and report back access details +for cachelines with highest contention - highest number of HITM accesses. + +The basic workflow with this tool follows the standard record/report phase. +User uses the record command to record events data and report command to +display it. + + +RECORD OPTIONS +-------------- +-e:: +--event=:: + Select the PMU event. Use 'perf mem record -e list' + to list available events. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-l:: +--ldlat:: + Configure mem-loads latency. + +-k:: +--all-kernel:: + Configure all used events to run in kernel space. + +-u:: +--all-user:: + Configure all used events to run in user space. + +REPORT OPTIONS +-------------- +-k:: +--vmlinux=:: + vmlinux pathname + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +-i:: +--input:: + Specify the input file to process. + +-N:: +--node-info:: + Show extra node info in report (see NODE INFO section) + +-c:: +--coalesce:: + Specify sorintg fields for single cacheline display. + Following fields are available: tid,pid,iaddr,dso + (see COALESCE) + +-g:: +--call-graph:: + Setup callchains parameters. + Please refer to perf-report man page for details. + +--stdio:: + Force the stdio output (see STDIO OUTPUT) + +--stats:: + Display only statistic tables and force stdio mode. + +--full-symbols:: + Display full length of symbols. + +C2C RECORD +---------- +The perf c2c record command setup options related to HITM cacheline analysis +and calls standard perf record command. + +Following perf record options are configured by default: +(check perf record man page for details) + + -W,-d,--sample-cpu + +Unless specified otherwise with '-e' option, following events are monitored by +default: + + cpu/mem-loads,ldlat=30/P + cpu/mem-stores/P + +User can pass any 'perf record' option behind '--' mark, like (to enable +callchains and system wide monitoring): + + $ perf c2c record -- -g -a + +Please check RECORD OPTIONS section for specific c2c record options. + +C2C REPORT +---------- +The perf c2c report command displays shared data analysis. It comes in two +display modes: stdio and tui (default). + +The report command workflow is following: + - sort all the data based on the cacheline address + - store access details for each cacheline + - sort all cachelines based on user settings + - display data + +In general perf report output consist of 2 basic views: + 1) most expensive cachelines list + 2) offsets details for each cacheline + +For each cacheline in the 1) list we display following data: +(Both stdio and TUI modes follow the same fields output) + + Index + - zero based index to identify the cacheline + + Cacheline + - cacheline address (hex number) + + Total records + - sum of all cachelines accesses + + Rmt/Lcl Hitm + - cacheline percentage of all Remote/Local HITM accesses + + LLC Load Hitm - Total, Lcl, Rmt + - count of Total/Local/Remote load HITMs + + Store Reference - Total, L1Hit, L1Miss + Total - all store accesses + L1Hit - store accesses that hit L1 + L1Hit - store accesses that missed L1 + + Load Dram + - count of local and remote DRAM accesses + + LLC Ld Miss + - count of all accesses that missed LLC + + Total Loads + - sum of all load accesses + + Core Load Hit - FB, L1, L2 + - count of load hits in FB (Fill Buffer), L1 and L2 cache + + LLC Load Hit - Llc, Rmt + - count of LLC and Remote load hits + +For each offset in the 2) list we display following data: + + HITM - Rmt, Lcl + - % of Remote/Local HITM accesses for given offset within cacheline + + Store Refs - L1 Hit, L1 Miss + - % of store accesses that hit/missed L1 for given offset within cacheline + + Data address - Offset + - offset address + + Pid + - pid of the process responsible for the accesses + + Tid + - tid of the process responsible for the accesses + + Code address + - code address responsible for the accesses + + cycles - rmt hitm, lcl hitm, load + - sum of cycles for given accesses - Remote/Local HITM and generic load + + cpu cnt + - number of cpus that participated on the access + + Symbol + - code symbol related to the 'Code address' value + + Shared Object + - shared object name related to the 'Code address' value + + Source:Line + - source information related to the 'Code address' value + + Node + - nodes participating on the access (see NODE INFO section) + +NODE INFO +--------- +The 'Node' field displays nodes that accesses given cacheline +offset. Its output comes in 3 flavors: + - node IDs separated by ',' + - node IDs with stats for each ID, in following format: + Node{cpus %hitms %stores} + - node IDs with list of affected CPUs in following format: + Node{cpu list} + +User can switch between above flavors with -N option or +use 'n' key to interactively switch in TUI mode. + +COALESCE +-------- +User can specify how to sort offsets for cacheline. + +Following fields are available and governs the final +output fields set for caheline offsets output: + + tid - coalesced by process TIDs + pid - coalesced by process PIDs + iaddr - coalesced by code address, following fields are displayed: + Code address, Code symbol, Shared Object, Source line + dso - coalesced by shared object + +By default the coalescing is setup with 'pid,tid,iaddr'. + +STDIO OUTPUT +------------ +The stdio output displays data on standard output. + +Following tables are displayed: + Trace Event Information + - overall statistics of memory accesses + + Global Shared Cache Line Event Information + - overall statistics on shared cachelines + + Shared Data Cache Line Table + - list of most expensive cachelines + + Shared Cache Line Distribution Pareto + - list of all accessed offsets for each cacheline + +TUI OUTPUT +---------- +The TUI output provides interactive interface to navigate +through cachelines list and to display offset details. + +For details please refer to the help window by pressing '?' key. + +CREDITS +------- +Although Don Zickus, Dick Fowles and Joe Mario worked together +to get this implemented, we got lots of early help from Arnaldo +Carvalho de Melo, Stephane Eranian, Jiri Olsa and Andi Kleen. + +C2C BLOG +-------- +Check Joe's blog on c2c tool for detailed use case explanation: + https://joemario.github.io/blog/2016/09/01/c2c-blog/ + +SEE ALSO +-------- +linkperf:perf-record[1], linkperf:perf-mem[1] diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 4af66835bcb7..32c9e62be5a2 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -1,3 +1,14 @@ +/* + * This is rewrite of original c2c tool introduced in here: + * http://lwn.net/Articles/588866/ + * + * The original tool was changed to fit in current perf state. + * + * Original authors: + * Don Zickus + * Dick Fowles + * Joe Mario + */ #include #include #include From 18f278d2dd68fdccf8810e78d10d6b75a6f2f67a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 11 Oct 2016 13:39:47 +0200 Subject: [PATCH 51/52] perf c2c report: Add --no-source option Add a possibility to disable source line column with new --no-source option. It source line data could take lot of time to retrieve, so it could be a performance burden for big data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-8p6s2727fq8nbsm3it5gix3p@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 3 +++ tools/perf/builtin-c2c.c | 13 ++++++++----- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index ba2f4de399c3..33ed4564a8c0 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -94,6 +94,9 @@ REPORT OPTIONS --full-symbols:: Display full length of symbols. +--no-source:: + Do not display Source:Line column. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 32c9e62be5a2..7eb418d3af04 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2404,7 +2404,7 @@ static int setup_display(const char *str) for (__tok = strtok_r(__buf, __sep, &__tmp); __tok; \ __tok = strtok_r(NULL, __sep, &__tmp)) -static int build_cl_output(char *cl_sort) +static int build_cl_output(char *cl_sort, bool no_source) { char *tok, *tmp, *buf = strdup(cl_sort); bool add_pid = false; @@ -2426,7 +2426,7 @@ static int build_cl_output(char *cl_sort) add_iaddr = true; add_sym = true; add_dso = true; - add_src = true; + add_src = no_source ? false : true; } else if (!strcmp(tok, "dso")) { add_dso = true; } else if (strcmp(tok, "offset")) { @@ -2462,14 +2462,14 @@ static int build_cl_output(char *cl_sort) return 0; } -static int setup_coalesce(const char *coalesce) +static int setup_coalesce(const char *coalesce, bool no_source) { const char *c = coalesce ?: coalesce_default; if (asprintf(&c2c.cl_sort, "offset,%s", c) < 0) return -ENOMEM; - if (build_cl_output(c2c.cl_sort)) + if (build_cl_output(c2c.cl_sort, no_source)) return -1; if (asprintf(&c2c.cl_resort, "offset,%s", @@ -2494,6 +2494,7 @@ static int perf_c2c__report(int argc, const char **argv) char callchain_default_opt[] = CALLCHAIN_DEFAULT_OPT; const char *display = NULL; const char *coalesce = NULL; + bool no_source = false; const struct option c2c_options[] = { OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), @@ -2510,6 +2511,8 @@ static int perf_c2c__report(int argc, const char **argv) "Use the stdio interface"), OPT_BOOLEAN(0, "full-symbols", &c2c.symbol_full, "Display full length of symbols"), + OPT_BOOLEAN(0, "no-source", &no_source, + "Do not display Source Line column"), OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt, @@ -2545,7 +2548,7 @@ static int perf_c2c__report(int argc, const char **argv) if (err) goto out; - err = setup_coalesce(coalesce); + err = setup_coalesce(coalesce, no_source); if (err) { pr_debug("Failed to initialize hists\n"); goto out; From af09b2d35e18f1a377aaa2bc4e5ba4abb98a1088 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 11 Oct 2016 13:52:05 +0200 Subject: [PATCH 52/52] perf c2c report: Add --show-all option Normally we limit the main list to contain only entries with HITM % value > 0.0005, but it might be useful to display all captured entries. Adding --show-all option for that. Requested-and-Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Don Zickus Cc: Joe Mario Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-nokgjdwikbegec5jzj4mxhqc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-c2c.txt | 3 +++ tools/perf/builtin-c2c.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/tools/perf/Documentation/perf-c2c.txt b/tools/perf/Documentation/perf-c2c.txt index 33ed4564a8c0..21810d711f5f 100644 --- a/tools/perf/Documentation/perf-c2c.txt +++ b/tools/perf/Documentation/perf-c2c.txt @@ -97,6 +97,9 @@ REPORT OPTIONS --no-source:: Do not display Source:Line column. +--show-all:: + Show all captured HITM lines, with no regard to HITM % 0.0005 limit. + C2C RECORD ---------- The perf c2c record command setup options related to HITM cacheline analysis diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 7eb418d3af04..c6d0dda594d9 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -71,6 +71,7 @@ struct perf_c2c { int node_info; bool show_src; + bool show_all; bool use_stdio; bool stats_only; bool symbol_full; @@ -1773,8 +1774,8 @@ static bool he__display(struct hist_entry *he, struct c2c_stats *stats) struct c2c_hist_entry *c2c_he; double ld_dist; - /* XXX Disabled for now, till we get a command line switch to control this */ - return true; + if (c2c.show_all) + return true; c2c_he = container_of(he, struct c2c_hist_entry, he); @@ -2513,6 +2514,8 @@ static int perf_c2c__report(int argc, const char **argv) "Display full length of symbols"), OPT_BOOLEAN(0, "no-source", &no_source, "Do not display Source Line column"), + OPT_BOOLEAN(0, "show-all", &c2c.show_all, + "Show all captured HITM lines."), OPT_CALLBACK_DEFAULT('g', "call-graph", &callchain_param, "print_type,threshold[,print_limit],order,sort_key[,branch],value", callchain_help, &parse_callchain_opt,