Merge tag 'perf-tools-fixes-for-v5.9-2020-09-01' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux
Pull perf tools fixes from Arnaldo Carvalho de Melo:
- Fix infinite loop in the TUI for grouped events in 'perf top/record',
eg when using "perf top -e '{cycles,instructions,cache-misses}'".
- Fix segfault by skipping side-band event setup if HAVE_LIBBPF_SUPPORT
is not set.
- Fix synthesized branch stacks generated from CoreSight ETM trace and
Intel PT hardware traces.
- Fix error when synthesizing events from ARM SPE hardware trace.
- The SNOOPX and REMOTE offsets in the data_src bitmask in perf records
were were both 37, SNOOPX is 38, fix it.
- Fix use of CPU list with summary option in 'perf sched timehist'.
- Avoid an uninitialized read when using fake PMUs.
- Set perf_event_attr.exclude_guest=1 for user-space counting.
- Don't order events when doing a 'perf report -D' raw dump of
perf.data records.
- Set NULL sentinel in pmu_events table in "Parse and process metrics"
'perf test'
- Fix basic bpf filtering 'perf test' on s390x.
- Fix out of bounds array access in the 'perf stat' print_counters()
evlist method.
- Add mwait_idle_with_hints.constprop.0 to the list of idle symbols.
- Use %zd for size_t printf formats on 32-bit.
- Correct the help info of "perf record --no-bpf-event" option.
- Add entries for CoreSight and Arm SPE tooling to MAINTAINERS.
* tag 'perf-tools-fixes-for-v5.9-2020-09-01' of git://git.kernel.org/pub/scm/linux/kernel/git/acme/linux:
perf report: Disable ordered_events for raw dump
perf tools: Correct SNOOPX field offset
perf intel-pt: Fix corrupt data after perf inject from
perf cs-etm: Fix corrupt data after perf inject from
perf top/report: Fix infinite loop in the TUI for grouped events
perf parse-events: Avoid an uninitialized read when using fake PMUs
perf stat: Fix out of bounds array access in the print_counters() evlist method
perf test: Set NULL sentinel in pmu_events table in "Parse and process metrics" test
perf parse-events: Set exclude_guest=1 for user-space counting
perf record: Correct the help info of option "--no-bpf-event"
perf tools: Use %zd for size_t printf formats on 32-bit
MAINTAINERS: Add entries for CoreSight and Arm SPE tooling
perf: arm-spe: Fix check error when synthesizing events
perf symbols: Add mwait_idle_with_hints.constprop.0 to the list of idle symbols
perf top: Skip side-band event setup if HAVE_LIBBPF_SUPPORT is not set
perf sched timehist: Fix use of CPU list with summary option
perf test: Fix basic bpf filtering test
diff --git a/MAINTAINERS b/MAINTAINERS
index 7119165..592467b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13571,12 +13571,18 @@
F: tools/lib/perf/
F: tools/perf/
-PERFORMANCE EVENTS SUBSYSTEM ARM64 PMU EVENTS
+PERFORMANCE EVENTS TOOLING ARM64
R: John Garry <john.garry@huawei.com>
R: Will Deacon <will@kernel.org>
+R: Mathieu Poirier <mathieu.poirier@linaro.org>
+R: Leo Yan <leo.yan@linaro.org>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Supported
+F: tools/build/feature/test-libopencsd.c
+F: tools/perf/arch/arm*/
F: tools/perf/pmu-events/arch/arm64/
+F: tools/perf/util/arm-spe*
+F: tools/perf/util/cs-etm*
PERSONALITY HANDLING
M: Christoph Hellwig <hch@infradead.org>
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index 077e7ee..3e5dcdd 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -1196,7 +1196,7 @@
#define PERF_MEM_SNOOPX_FWD 0x01 /* forward */
/* 1 free */
-#define PERF_MEM_SNOOPX_SHIFT 37
+#define PERF_MEM_SNOOPX_SHIFT 38
/* locked instruction */
#define PERF_MEM_LOCK_NA 0x01 /* not available */
diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index f91352f..772f105 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -2452,7 +2452,7 @@
OPT_BOOLEAN(0, "tail-synthesize", &record.opts.tail_synthesize,
"synthesize non-sample events at the end of output"),
OPT_BOOLEAN(0, "overwrite", &record.opts.overwrite, "use overwrite mode"),
- OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "record bpf events"),
+ OPT_BOOLEAN(0, "no-bpf-event", &record.opts.no_bpf_event, "do not record bpf events"),
OPT_BOOLEAN(0, "strict-freq", &record.opts.strict_freq,
"Fail if the specified frequency can't be used"),
OPT_CALLBACK('F', "freq", &record.opts, "freq or 'max'",
diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c
index ece1cdd..3c74c9c 100644
--- a/tools/perf/builtin-report.c
+++ b/tools/perf/builtin-report.c
@@ -1332,6 +1332,9 @@
if (report.mmaps_mode)
report.tasks_mode = true;
+ if (dump_trace)
+ report.tool.ordered_events = false;
+
if (quiet)
perf_quiet_option();
diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c
index 0c7d599..e6fc297 100644
--- a/tools/perf/builtin-sched.c
+++ b/tools/perf/builtin-sched.c
@@ -2584,7 +2584,8 @@
}
if (!sched->idle_hist || thread->tid == 0) {
- timehist_update_runtime_stats(tr, t, tprev);
+ if (!cpu_list || test_bit(sample->cpu, cpu_bitmap))
+ timehist_update_runtime_stats(tr, t, tprev);
if (sched->idle_hist) {
struct idle_thread_runtime *itr = (void *)tr;
@@ -2857,6 +2858,9 @@
printf("\nIdle stats:\n");
for (i = 0; i < idle_max_cpu; ++i) {
+ if (cpu_list && !test_bit(i, cpu_bitmap))
+ continue;
+
t = idle_threads[i];
if (!t)
continue;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index 994c230..7c64134 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1746,6 +1746,7 @@
goto out_delete_evlist;
}
+#ifdef HAVE_LIBBPF_SUPPORT
if (!top.record_opts.no_bpf_event) {
top.sb_evlist = evlist__new();
@@ -1759,6 +1760,7 @@
goto out_delete_evlist;
}
}
+#endif
if (perf_evlist__start_sb_thread(top.sb_evlist, target)) {
pr_debug("Couldn't start the BPF side band thread:\nBPF programs starting from now on won't be annotatable\n");
diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c
index 5d20bf8..cd77e334 100644
--- a/tools/perf/tests/bpf.c
+++ b/tools/perf/tests/bpf.c
@@ -197,7 +197,7 @@
perf_mmap__read_done(&md->core);
}
- if (count != expect) {
+ if (count != expect * evlist->core.nr_entries) {
pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count);
goto out_delete_evlist;
}
diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c
index 7f9f87a..aae0fd9 100644
--- a/tools/perf/tests/parse-events.c
+++ b/tools/perf/tests/parse-events.c
@@ -719,7 +719,7 @@
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
@@ -842,7 +842,7 @@
TEST_ASSERT_VAL("wrong exclude_user", !evsel->core.attr.exclude_user);
TEST_ASSERT_VAL("wrong exclude_kernel", evsel->core.attr.exclude_kernel);
TEST_ASSERT_VAL("wrong exclude_hv", evsel->core.attr.exclude_hv);
- TEST_ASSERT_VAL("wrong exclude guest", !evsel->core.attr.exclude_guest);
+ TEST_ASSERT_VAL("wrong exclude guest", evsel->core.attr.exclude_guest);
TEST_ASSERT_VAL("wrong exclude host", !evsel->core.attr.exclude_host);
TEST_ASSERT_VAL("wrong precise_ip", !evsel->core.attr.precise_ip);
TEST_ASSERT_VAL("wrong leader", evsel__is_group_leader(evsel));
diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c
index fc0838a..23db8ac 100644
--- a/tools/perf/tests/parse-metric.c
+++ b/tools/perf/tests/parse-metric.c
@@ -70,6 +70,9 @@
{
.metric_expr = "1/m3",
.metric_name = "M3",
+},
+{
+ .name = NULL,
}
};
diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c
index be9c4c0..a07626f 100644
--- a/tools/perf/ui/browsers/hists.c
+++ b/tools/perf/ui/browsers/hists.c
@@ -3629,8 +3629,8 @@
{
int nr_entries = evlist->core.nr_entries;
-single_entry:
if (perf_evlist__single_entry(evlist)) {
+single_entry: {
struct evsel *first = evlist__first(evlist);
return perf_evsel__hists_browse(first, nr_entries, help,
@@ -3638,6 +3638,7 @@
env, warn_lost_event,
annotation_opts);
}
+ }
if (symbol_conf.event_group) {
struct evsel *pos;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 302a14d..93e063f 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -182,15 +182,15 @@
if (payload & BIT(EV_TLB_ACCESS))
decoder->record.type |= ARM_SPE_TLB_ACCESS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_LLC_MISS)))
decoder->record.type |= ARM_SPE_LLC_MISS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_LLC_ACCESS)))
decoder->record.type |= ARM_SPE_LLC_ACCESS;
- if ((idx == 1 || idx == 2 || idx == 3) &&
+ if ((idx == 2 || idx == 4 || idx == 8) &&
(payload & BIT(EV_REMOTE_ACCESS)))
decoder->record.type |= ARM_SPE_REMOTE_ACCESS;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index c283223..a2a369e 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -1344,8 +1344,15 @@
attr.sample_type &= ~(u64)PERF_SAMPLE_ADDR;
}
- if (etm->synth_opts.last_branch)
+ if (etm->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (etm->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2a8d245..0af4e81 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -3017,8 +3017,15 @@
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
- if (pt->synth_opts.last_branch)
+ if (pt->synth_opts.last_branch) {
attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ /*
+ * We don't use the hardware index, but the sample generation
+ * code uses the new format branch_stack with this field,
+ * so the event attributes must indicate that it's present.
+ */
+ attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX;
+ }
if (pt->synth_opts.instructions) {
attr.config = PERF_COUNT_HW_INSTRUCTIONS;
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 9f7260e..c4d2394 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -37,6 +37,7 @@
#include "util/evsel_config.h"
#include "util/event.h"
#include "util/pfm.h"
+#include "perf.h"
#define MAX_NAME_LEN 100
@@ -1533,19 +1534,23 @@
evsel = __add_event(list, &parse_state->idx, &attr, true,
get_config_name(head_config), pmu,
&config_terms, auto_merge_stats, NULL);
- if (evsel) {
- evsel->unit = info.unit;
- evsel->scale = info.scale;
- evsel->per_pkg = info.per_pkg;
- evsel->snapshot = info.snapshot;
- evsel->metric_expr = info.metric_expr;
- evsel->metric_name = info.metric_name;
- evsel->pmu_name = name ? strdup(name) : NULL;
- evsel->use_uncore_alias = use_uncore_alias;
- evsel->percore = config_term_percore(&evsel->config_terms);
- }
+ if (!evsel)
+ return -ENOMEM;
- return evsel ? 0 : -ENOMEM;
+ evsel->pmu_name = name ? strdup(name) : NULL;
+ evsel->use_uncore_alias = use_uncore_alias;
+ evsel->percore = config_term_percore(&evsel->config_terms);
+
+ if (parse_state->fake_pmu)
+ return 0;
+
+ evsel->unit = info.unit;
+ evsel->scale = info.scale;
+ evsel->per_pkg = info.per_pkg;
+ evsel->snapshot = info.snapshot;
+ evsel->metric_expr = info.metric_expr;
+ evsel->metric_name = info.metric_name;
+ return 0;
}
int parse_events_multi_pmu_add(struct parse_events_state *parse_state,
@@ -1794,6 +1799,8 @@
if (*str == 'u') {
if (!exclude)
exclude = eu = ek = eh = 1;
+ if (!exclude_GH && !perf_guest)
+ eG = 1;
eu = 0;
} else if (*str == 'k') {
if (!exclude)
diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c
index ffbc9d3..7a5f037 100644
--- a/tools/perf/util/session.c
+++ b/tools/perf/util/session.c
@@ -87,7 +87,7 @@
session->decomp_last = decomp;
}
- pr_debug("decomp (B): %ld to %ld\n", src_size, decomp_size);
+ pr_debug("decomp (B): %zd to %zd\n", src_size, decomp_size);
return 0;
}
diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index 57d0706..493ec37 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -117,7 +117,7 @@
cpu_map__id_to_die(id),
config->csv_output ? 0 : -3,
cpu_map__id_to_cpu(id), config->csv_sep);
- } else {
+ } else if (id > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
evsel__cpus(evsel)->map[id],
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index 1f5fcb8..5151a8c 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -663,6 +663,7 @@
"exit_idle",
"mwait_idle",
"mwait_idle_with_hints",
+ "mwait_idle_with_hints.constprop.0",
"poll_idle",
"ppc64_runlatch_off",
"pseries_dedicated_idle_sleep",
diff --git a/tools/perf/util/zstd.c b/tools/perf/util/zstd.c
index d2202392..48dd2b0 100644
--- a/tools/perf/util/zstd.c
+++ b/tools/perf/util/zstd.c
@@ -99,7 +99,7 @@
while (input.pos < input.size) {
ret = ZSTD_decompressStream(data->dstream, &output, &input);
if (ZSTD_isError(ret)) {
- pr_err("failed to decompress (B): %ld -> %ld, dst_size %ld : %s\n",
+ pr_err("failed to decompress (B): %zd -> %zd, dst_size %zd : %s\n",
src_size, output.size, dst_size, ZSTD_getErrorName(ret));
break;
}