perf: Better track self-monitoring events
Track (and constrain) self-monitoring events...
Consider events created with current as the target task that are not
marked inherit as self-monitor events. Subsequently don't update the
userpage self-monitor data for any event that isn't considered
self-monitor.
This has the potential to break some really dodgy edge cases where
people use !task events on self.
The benefit is tighter control and less overhead when for !self
events.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 0905064..cf9bdb2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -2709,13 +2709,17 @@ void arch_perf_update_userpage(struct perf_event *event,
struct cyc2ns_data data;
u64 offset;
- userpg->cap_user_time = 0;
- userpg->cap_user_time_zero = 0;
- userpg->cap_user_rdpmc =
- !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
+ userpg->cap_user_rdpmc = !!(event->hw.flags & PERF_EVENT_FLAG_USER_READ_CNT);
userpg->pmc_width = x86_pmu.cntval_bits;
- if (!using_native_sched_clock() || !sched_clock_stable())
+ if (unlikely(!using_native_sched_clock() || !sched_clock_stable())) {
+ userpg->cap_user_time = 0;
+ userpg->cap_user_time_zero = 0;
+ return;
+ }
+
+ /* already set the time fields before */
+ if (likely(userpg->cap_user_time))
return;
cyc2ns_read_begin(&data);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index d2a15c0..ab0cad2 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -635,6 +635,7 @@ struct swevent_hlist {
#define PERF_ATTACH_ITRACE 0x10
#define PERF_ATTACH_SCHED_CB 0x20
#define PERF_ATTACH_CHILD 0x40
+#define PERF_ATTACH_SELF 0x80
struct bpf_prog;
struct perf_cgroup;
@@ -928,6 +929,7 @@ struct perf_event_context {
int nr_events;
int nr_user;
+ int nr_self;
int is_active;
int nr_task_data;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 0825098..9746163 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -1797,6 +1797,8 @@ list_add_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_user++;
if (event->attr.inherit_stat)
ctx->nr_stat++;
+ if (event->attach_state & PERF_ATTACH_SELF)
+ ctx->nr_self++;
if (event->state > PERF_EVENT_STATE_OFF)
perf_cgroup_event_enable(event, ctx);
@@ -1996,6 +1998,8 @@ list_del_event(struct perf_event *event, struct perf_event_context *ctx)
ctx->nr_user--;
if (event->attr.inherit_stat)
ctx->nr_stat--;
+ if (event->attach_state & PERF_ATTACH_SELF)
+ ctx->nr_self--;
list_del_rcu(&event->event_entry);
@@ -3783,7 +3787,8 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx,
*/
static inline bool event_update_userpage(struct perf_event *event)
{
- if (likely(!atomic_read(&event->mmap_count)))
+ if (likely(!atomic_read(&event->mmap_count) ||
+ !(event->attach_state & PERF_ATTACH_SELF)))
return false;
perf_event_update_time(event);
@@ -3830,7 +3835,8 @@ static int merge_sched_in(struct perf_event *event, void *data)
event->pmu_ctx->rotate_necessary = 1;
cpc = this_cpu_ptr(event->pmu_ctx->pmu->cpu_pmu_context);
perf_mux_hrtimer_restart(cpc);
- group_update_userpage(event);
+ if (ctx->nr_self)
+ group_update_userpage(event);
}
}
@@ -6087,6 +6093,9 @@ void perf_event_update_userpage(struct perf_event *event)
if (!rb)
goto unlock;
+ if (!(event->attach_state & PERF_ATTACH_SELF))
+ goto unlock;
+
/*
* compute total_time_enabled, total_time_running
* based on snapshot values taken when the event
@@ -11926,6 +11935,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
* pmu before we get a ctx.
*/
event->hw.target = get_task_struct(task);
+ if (event->hw.target == current && !attr->inherit)
+ event->attach_state |= PERF_ATTACH_SELF;
}
event->clock = &local_clock;