perf evsel: Don't set exclude_guest by default
Perf tool sets exclude_guest by default while calling perf_event_open().
Because IBS does not have filtering capability, it always gets rejected
by IBS PMU driver and thus perf falls back to non-precise sampling. Fix
it by not setting exclude_guest by default on AMD.
Before:
$ sudo ./perf record -C 0 -vvv true |& grep precise
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2
decreasing precise_ip by one (1)
precise_ip 1
decreasing precise_ip by one (0)
After:
$ sudo ./perf record -C 0 -vvv true |& grep precise
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2
Committer notes:
Fixup init to zero for perf_env in older compilers:
arch/x86/util/evsel.c:15:26: error: missing field 'os_release' initializer [-Werror,-Wmissing-field-initializers]
struct perf_env env = {0};
^
Committer notes:
Namhyung remarked:
It'd be nice if it can cover explicit "-e cycles:pp" as well.
Ravi clarified:
For explicit :pp modifier, evsel->precise_max does not get set and thus perf
does not try with different attr->precise_ip values while exclude_guest set.
So no issue with explicit :pp:
$ sudo ./perf record -C 0 -e cycles:pp -vvv |& grep "precise_ip\|exclude_guest"
precise_ip 2
exclude_guest 1
precise_ip 2
exclude_guest 1
switching off exclude_guest, exclude_host
precise_ip 2
^C
Also, with :P modifier, evsel->precise_max gets set but exclude_guest does
not and thus :P also works fine:
$ sudo ./perf record -C 0 -e cycles:P -vvv |& grep "precise_ip\|exclude_guest"
precise_ip 3
decreasing precise_ip by one (2)
precise_ip 2
^C
Reported-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
Acked-by: Namhyung Kim <namhyung@kernel.org>
Tested-by: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ian Rogers <irogers@google.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Link: http://lore.kernel.org/lkml/20211103072112.32312-1-ravi.bangoria@amd.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
This commit is contained in:
committed by
Arnaldo Carvalho de Melo
parent
3500eeebed
commit
eb39bf3256
@@ -1,8 +1,31 @@
|
|||||||
// SPDX-License-Identifier: GPL-2.0
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include "util/evsel.h"
|
#include "util/evsel.h"
|
||||||
|
#include "util/env.h"
|
||||||
|
#include "linux/string.h"
|
||||||
|
|
||||||
void arch_evsel__set_sample_weight(struct evsel *evsel)
|
void arch_evsel__set_sample_weight(struct evsel *evsel)
|
||||||
{
|
{
|
||||||
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
|
evsel__set_sample_bit(evsel, WEIGHT_STRUCT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr)
|
||||||
|
{
|
||||||
|
struct perf_env env = { .total_mem = 0, } ;
|
||||||
|
|
||||||
|
if (!perf_env__cpuid(&env))
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* On AMD, precise cycles event sampling internally uses IBS pmu.
|
||||||
|
* But IBS does not have filtering capabilities and perf by default
|
||||||
|
* sets exclude_guest = 1. This makes IBS pmu event init fail and
|
||||||
|
* thus perf ends up doing non-precise sampling. Avoid it by clearing
|
||||||
|
* exclude_guest.
|
||||||
|
*/
|
||||||
|
if (env.cpuid && strstarts(env.cpuid, "AuthenticAMD"))
|
||||||
|
attr->exclude_guest = 0;
|
||||||
|
|
||||||
|
free(env.cpuid);
|
||||||
|
}
|
||||||
|
|||||||
@@ -294,7 +294,7 @@ static bool perf_event_can_profile_kernel(void)
|
|||||||
return perf_event_paranoid_check(1);
|
return perf_event_paranoid_check(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
|
struct evsel *evsel__new_cycles(bool precise __maybe_unused, __u32 type, __u64 config)
|
||||||
{
|
{
|
||||||
struct perf_event_attr attr = {
|
struct perf_event_attr attr = {
|
||||||
.type = type,
|
.type = type,
|
||||||
@@ -305,18 +305,16 @@ struct evsel *evsel__new_cycles(bool precise, __u32 type, __u64 config)
|
|||||||
|
|
||||||
event_attr_init(&attr);
|
event_attr_init(&attr);
|
||||||
|
|
||||||
if (!precise)
|
|
||||||
goto new_event;
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now let the usual logic to set up the perf_event_attr defaults
|
* Now let the usual logic to set up the perf_event_attr defaults
|
||||||
* to kick in when we return and before perf_evsel__open() is called.
|
* to kick in when we return and before perf_evsel__open() is called.
|
||||||
*/
|
*/
|
||||||
new_event:
|
|
||||||
evsel = evsel__new(&attr);
|
evsel = evsel__new(&attr);
|
||||||
if (evsel == NULL)
|
if (evsel == NULL)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
|
arch_evsel__fixup_new_cycles(&evsel->core.attr);
|
||||||
|
|
||||||
evsel->precise_max = true;
|
evsel->precise_max = true;
|
||||||
|
|
||||||
/* use asprintf() because free(evsel) assumes name is allocated */
|
/* use asprintf() because free(evsel) assumes name is allocated */
|
||||||
@@ -1063,6 +1061,10 @@ void __weak arch_evsel__set_sample_weight(struct evsel *evsel)
|
|||||||
evsel__set_sample_bit(evsel, WEIGHT);
|
evsel__set_sample_bit(evsel, WEIGHT);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void __weak arch_evsel__fixup_new_cycles(struct perf_event_attr *attr __maybe_unused)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The enable_on_exec/disabled value strategy:
|
* The enable_on_exec/disabled value strategy:
|
||||||
*
|
*
|
||||||
|
|||||||
@@ -283,6 +283,7 @@ void __evsel__reset_sample_bit(struct evsel *evsel, enum perf_event_sample_forma
|
|||||||
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
|
void evsel__set_sample_id(struct evsel *evsel, bool use_sample_identifier);
|
||||||
|
|
||||||
void arch_evsel__set_sample_weight(struct evsel *evsel);
|
void arch_evsel__set_sample_weight(struct evsel *evsel);
|
||||||
|
void arch_evsel__fixup_new_cycles(struct perf_event_attr *attr);
|
||||||
|
|
||||||
int evsel__set_filter(struct evsel *evsel, const char *filter);
|
int evsel__set_filter(struct evsel *evsel, const char *filter);
|
||||||
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
|
int evsel__append_tp_filter(struct evsel *evsel, const char *filter);
|
||||||
|
|||||||
Reference in New Issue
Block a user