blob: dafffb6db4ca8eb60b1b38ece705aaa9c46044cb [file] [log] [blame]
From 54d71a2b4bbe887691fb9223c8ffd678132679f2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 25 Aug 2009 15:51:05 +0200
Subject: [PATCH] perf_events: defer poll() wakeups to softirq on RT
commit 0a764c16d61b4d5ec9a9fadae931c79484a274cd in tip, also see
merge artefact 2e5f15f4c51efd5fdb1ea97380c38d3f8c32d3e9 in tip.
Normally pending work is work that cannot be done from NMI context, such
as wakeups and disabling the counter. The pending work is a single
linked list using atomic ops so that it functions from NMI context.
Normally this is called from IRQ context through use of an self-IPI
(x86) or upon enabling hard interrupts (powerpc). Architectures that do
not implement perf_event_set_pending() nor call
perf_event_do_pending() upon leaving NMI context will get a polling
fallback from the timer softirq.
However, in -rt we cannot do the wakeup from IRQ context because its a
wait_queue wakup, which can be O(n), so defer all wakeups to the softirq
fallback by creating a second pending list that's only processed from
there.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
LKML-Reference: <1251208265.7538.1157.camel@twins>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index a177698..f57b3ab 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -645,6 +645,9 @@ struct perf_event {
int pending_kill;
int pending_disable;
struct perf_pending_entry pending;
+#ifdef CONFIG_PREEMPT_RT
+ struct perf_pending_entry pending_softirq;
+#endif
atomic_t event_limit;
@@ -753,6 +756,7 @@ extern void perf_event_exit_task(struct task_struct *child);
extern void perf_event_free_task(struct task_struct *task);
extern void set_perf_event_pending(void);
extern void perf_event_do_pending(void);
+extern void perf_event_do_pending_softirq(void);
extern void perf_event_print_debug(void);
extern void __perf_disable(void);
extern bool __perf_enable(void);
@@ -883,6 +887,7 @@ static inline int perf_event_init_task(struct task_struct *child) { return 0; }
static inline void perf_event_exit_task(struct task_struct *child) { }
static inline void perf_event_free_task(struct task_struct *task) { }
static inline void perf_event_do_pending(void) { }
+static inline void perf_event_do_pending_softirq(void) { }
static inline void perf_event_print_debug(void) { }
static inline void perf_disable(void) { }
static inline void perf_enable(void) { }
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 2ae7409..ee1d0ab 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -2560,11 +2560,26 @@ static void perf_pending_event(struct perf_pending_entry *entry)
__perf_event_disable(event);
}
+#ifndef CONFIG_PREEMPT_RT
if (event->pending_wakeup) {
event->pending_wakeup = 0;
perf_event_wakeup(event);
}
+#endif
+}
+
+#ifdef CONFIG_PREEMPT_RT
+static void perf_pending_counter_softirq(struct perf_pending_entry *entry)
+{
+ struct perf_event *counter = container_of(entry,
+ struct perf_event, pending_softirq);
+
+ if (counter->pending_wakeup) {
+ counter->pending_wakeup = 0;
+ perf_event_wakeup(counter);
+ }
}
+#endif
#define PENDING_TAIL ((struct perf_pending_entry *)-1UL)
@@ -2572,33 +2587,42 @@ static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_head) = {
PENDING_TAIL,
};
-static void perf_pending_queue(struct perf_pending_entry *entry,
- void (*func)(struct perf_pending_entry *))
-{
- struct perf_pending_entry **head;
+static DEFINE_PER_CPU(struct perf_pending_entry *, perf_pending_softirq_head) = {
+ PENDING_TAIL,
+};
+static void __perf_pending_queue(struct perf_pending_entry **head,
+ struct perf_pending_entry *entry,
+ void (*func)(struct perf_pending_entry *))
+{
if (cmpxchg(&entry->next, NULL, PENDING_TAIL) != NULL)
return;
entry->func = func;
- head = &get_cpu_var(perf_pending_head);
-
do {
entry->next = *head;
} while (cmpxchg(head, entry->next, entry) != entry->next);
+}
- set_perf_event_pending();
+static void perf_pending_queue(struct perf_pending_entry *entry,
+ void (*func)(struct perf_pending_entry *))
+{
+ struct perf_pending_entry **head;
+ head = &get_cpu_var(perf_pending_head);
+ __perf_pending_queue(head, entry, func);
put_cpu_var(perf_pending_head);
+
+ set_perf_event_pending();
}
-static int __perf_pending_run(void)
+static int __perf_pending_run(struct perf_pending_entry **head)
{
struct perf_pending_entry *list;
int nr = 0;
- list = xchg(&__get_cpu_var(perf_pending_head), PENDING_TAIL);
+ list = xchg(head, PENDING_TAIL);
while (list != PENDING_TAIL) {
void (*func)(struct perf_pending_entry *);
struct perf_pending_entry *entry = list;
@@ -2628,7 +2652,8 @@ static inline int perf_not_pending(struct perf_event *event)
* need to wait.
*/
get_cpu();
- __perf_pending_run();
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+ __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
put_cpu();
/*
@@ -2646,7 +2671,13 @@ static void perf_pending_sync(struct perf_event *event)
void perf_event_do_pending(void)
{
- __perf_pending_run();
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+}
+
+void perf_event_do_pending_softirq(void)
+{
+ __perf_pending_run(&__get_cpu_var(perf_pending_head));
+ __perf_pending_run(&__get_cpu_var(perf_pending_softirq_head));
}
/*
@@ -2684,12 +2715,18 @@ static void perf_output_wakeup(struct perf_output_handle *handle)
{
atomic_set(&handle->data->poll, POLL_IN);
+#ifndef CONFIG_PREEMPT_RT
if (handle->nmi) {
handle->event->pending_wakeup = 1;
perf_pending_queue(&handle->event->pending,
perf_pending_event);
} else
perf_event_wakeup(handle->event);
+#else
+ __perf_pending_queue(&__get_cpu_var(perf_pending_softirq_head),
+ &handle->event->pending_softirq,
+ perf_pending_counter_softirq);
+#endif
}
/*
diff --git a/kernel/timer.c b/kernel/timer.c
index f4978ac..c850d06 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -1278,6 +1278,7 @@ static void run_timer_softirq(struct softirq_action *h)
printk_tick();
hrtimer_run_pending();
+ perf_event_do_pending_softirq();
if (time_after_eq(jiffies, base->timer_jiffies))
__run_timers(base);
--
1.7.1.1