| From 7a62893f99991b01729034c1198dd3c2a72ea0df Mon Sep 17 00:00:00 2001 |
| From: Steven Rostedt <rostedt@goodmis.org> |
| Date: Tue, 27 Sep 2011 13:56:50 -0400 |
| Subject: [PATCH 156/262] ring-buffer: Convert reader_lock from raw_spin_lock |
| into spin_lock |
| |
| The reader_lock is mostly taken in normal context with interrupts enabled. |
| But because ftrace_dump() can happen anywhere, it is used as a spin lock |
| and in some cases a check to in_nmi() is performed to determine if the |
| ftrace_dump() was initiated from an NMI and if it is, the lock is not taken. |
| |
| But having the lock as a raw_spin_lock() causes issues with the real-time |
| kernel as the lock is held during allocation and freeing of the buffer. |
| As memory locks convert into mutexes, keeping the reader_lock as a spin_lock |
| causes problems. |
| |
| Converting the reader_lock is not straight forward as we must still deal |
| with the ftrace_dump() happening not only from an NMI but also from |
| true interrupt context in PREEPMT_RT. |
| |
| Two wrapper functions are created to take and release the reader lock: |
| |
| int read_buffer_lock(cpu_buffer, unsigned long *flags) |
| void read_buffer_unlock(cpu_buffer, unsigned long flags, int locked) |
| |
| The read_buffer_lock() returns 1 if it actually took the lock, disables |
| interrupts and updates the flags. The only time it returns 0 is in the |
| case of a ftrace_dump() happening in an unsafe context. |
| |
| The read_buffer_unlock() checks the return of locked and will simply |
| unlock the spin lock if it was successfully taken. |
| |
| Instead of just having this in specific cases that the NMI might call |
| into, all instances of the reader_lock is converted to the wrapper |
| functions to make this a bit simpler to read and less error prone. |
| |
| Signed-off-by: Steven Rostedt <rostedt@goodmis.org> |
| Cc: Peter Zijlstra <peterz@infradead.org> |
| Cc: Clark Williams <clark@redhat.com> |
| Link: http://lkml.kernel.org/r/1317146210.26514.33.camel@gandalf.stny.rr.com |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| --- |
| kernel/trace/ring_buffer.c | 151 ++++++++++++++++++++++++-------------------- |
| 1 file changed, 81 insertions(+), 70 deletions(-) |
| |
| diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c |
| index cf8d11e..24efd16 100644 |
| --- a/kernel/trace/ring_buffer.c |
| +++ b/kernel/trace/ring_buffer.c |
| @@ -446,7 +446,7 @@ struct ring_buffer_per_cpu { |
| int cpu; |
| atomic_t record_disabled; |
| struct ring_buffer *buffer; |
| - raw_spinlock_t reader_lock; /* serialize readers */ |
| + spinlock_t reader_lock; /* serialize readers */ |
| arch_spinlock_t lock; |
| struct lock_class_key lock_key; |
| struct list_head *pages; |
| @@ -1017,6 +1017,44 @@ static int rb_allocate_pages(struct ring_buffer_per_cpu *cpu_buffer, |
| return -ENOMEM; |
| } |
| |
| +static inline int ok_to_lock(void) |
| +{ |
| + if (in_nmi()) |
| + return 0; |
| +#ifdef CONFIG_PREEMPT_RT_FULL |
| + if (in_atomic()) |
| + return 0; |
| +#endif |
| + return 1; |
| +} |
| + |
| +static int |
| +read_buffer_lock(struct ring_buffer_per_cpu *cpu_buffer, |
| + unsigned long *flags) |
| +{ |
| + /* |
| + * If an NMI die dumps out the content of the ring buffer |
| + * do not grab locks. We also permanently disable the ring |
| + * buffer too. A one time deal is all you get from reading |
| + * the ring buffer from an NMI. |
| + */ |
| + if (!ok_to_lock()) { |
| + if (spin_trylock_irqsave(&cpu_buffer->reader_lock, *flags)) |
| + return 1; |
| + tracing_off_permanent(); |
| + return 0; |
| + } |
| + spin_lock_irqsave(&cpu_buffer->reader_lock, *flags); |
| + return 1; |
| +} |
| + |
| +static void |
| +read_buffer_unlock(struct ring_buffer_per_cpu *cpu_buffer, |
| + unsigned long flags, int locked) |
| +{ |
| + if (locked) |
| + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| +} |
| static struct ring_buffer_per_cpu * |
| rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) |
| { |
| @@ -1032,7 +1070,7 @@ rb_allocate_cpu_buffer(struct ring_buffer *buffer, int cpu) |
| |
| cpu_buffer->cpu = cpu; |
| cpu_buffer->buffer = buffer; |
| - raw_spin_lock_init(&cpu_buffer->reader_lock); |
| + spin_lock_init(&cpu_buffer->reader_lock); |
| lockdep_set_class(&cpu_buffer->reader_lock, buffer->reader_lock_key); |
| cpu_buffer->lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED; |
| |
| @@ -1227,9 +1265,11 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) |
| { |
| struct buffer_page *bpage; |
| struct list_head *p; |
| + unsigned long flags; |
| unsigned i; |
| + int locked; |
| |
| - raw_spin_lock_irq(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| rb_head_page_deactivate(cpu_buffer); |
| |
| for (i = 0; i < nr_pages; i++) { |
| @@ -1247,7 +1287,7 @@ rb_remove_pages(struct ring_buffer_per_cpu *cpu_buffer, unsigned nr_pages) |
| rb_check_pages(cpu_buffer); |
| |
| out: |
| - raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| } |
| |
| static void |
| @@ -1256,9 +1296,11 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, |
| { |
| struct buffer_page *bpage; |
| struct list_head *p; |
| + unsigned long flags; |
| unsigned i; |
| + int locked; |
| |
| - raw_spin_lock_irq(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| rb_head_page_deactivate(cpu_buffer); |
| |
| for (i = 0; i < nr_pages; i++) { |
| @@ -1273,7 +1315,7 @@ rb_insert_pages(struct ring_buffer_per_cpu *cpu_buffer, |
| rb_check_pages(cpu_buffer); |
| |
| out: |
| - raw_spin_unlock_irq(&cpu_buffer->reader_lock); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| } |
| |
| /** |
| @@ -2714,7 +2756,7 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) |
| return 0; |
| |
| cpu_buffer = buffer->buffers[cpu]; |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| /* |
| * if the tail is on reader_page, oldest time stamp is on the reader |
| * page |
| @@ -2724,7 +2766,7 @@ unsigned long ring_buffer_oldest_event_ts(struct ring_buffer *buffer, int cpu) |
| else |
| bpage = rb_set_head_page(cpu_buffer); |
| ret = bpage->page->time_stamp; |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| |
| return ret; |
| } |
| @@ -2888,15 +2930,16 @@ void ring_buffer_iter_reset(struct ring_buffer_iter *iter) |
| { |
| struct ring_buffer_per_cpu *cpu_buffer; |
| unsigned long flags; |
| + int locked; |
| |
| if (!iter) |
| return; |
| |
| cpu_buffer = iter->cpu_buffer; |
| |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| rb_iter_reset(iter); |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| } |
| EXPORT_SYMBOL_GPL(ring_buffer_iter_reset); |
| |
| @@ -3314,21 +3357,6 @@ rb_iter_peek(struct ring_buffer_iter *iter, u64 *ts) |
| } |
| EXPORT_SYMBOL_GPL(ring_buffer_iter_peek); |
| |
| -static inline int rb_ok_to_lock(void) |
| -{ |
| - /* |
| - * If an NMI die dumps out the content of the ring buffer |
| - * do not grab locks. We also permanently disable the ring |
| - * buffer too. A one time deal is all you get from reading |
| - * the ring buffer from an NMI. |
| - */ |
| - if (likely(!in_nmi())) |
| - return 1; |
| - |
| - tracing_off_permanent(); |
| - return 0; |
| -} |
| - |
| /** |
| * ring_buffer_peek - peek at the next event to be read |
| * @buffer: The ring buffer to read |
| @@ -3346,22 +3374,17 @@ ring_buffer_peek(struct ring_buffer *buffer, int cpu, u64 *ts, |
| struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
| struct ring_buffer_event *event; |
| unsigned long flags; |
| - int dolock; |
| + int locked; |
| |
| if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| return NULL; |
| |
| - dolock = rb_ok_to_lock(); |
| again: |
| - local_irq_save(flags); |
| - if (dolock) |
| - raw_spin_lock(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
| if (event && event->type_len == RINGBUF_TYPE_PADDING) |
| rb_advance_reader(cpu_buffer); |
| - if (dolock) |
| - raw_spin_unlock(&cpu_buffer->reader_lock); |
| - local_irq_restore(flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| if (event && event->type_len == RINGBUF_TYPE_PADDING) |
| goto again; |
| @@ -3383,11 +3406,12 @@ ring_buffer_iter_peek(struct ring_buffer_iter *iter, u64 *ts) |
| struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| struct ring_buffer_event *event; |
| unsigned long flags; |
| + int locked; |
| |
| again: |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| event = rb_iter_peek(iter, ts); |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| if (event && event->type_len == RINGBUF_TYPE_PADDING) |
| goto again; |
| @@ -3413,9 +3437,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
| struct ring_buffer_per_cpu *cpu_buffer; |
| struct ring_buffer_event *event = NULL; |
| unsigned long flags; |
| - int dolock; |
| - |
| - dolock = rb_ok_to_lock(); |
| + int locked; |
| |
| again: |
| /* might be called in atomic */ |
| @@ -3425,9 +3447,7 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
| goto out; |
| |
| cpu_buffer = buffer->buffers[cpu]; |
| - local_irq_save(flags); |
| - if (dolock) |
| - raw_spin_lock(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| |
| event = rb_buffer_peek(cpu_buffer, ts, lost_events); |
| if (event) { |
| @@ -3435,9 +3455,8 @@ ring_buffer_consume(struct ring_buffer *buffer, int cpu, u64 *ts, |
| rb_advance_reader(cpu_buffer); |
| } |
| |
| - if (dolock) |
| - raw_spin_unlock(&cpu_buffer->reader_lock); |
| - local_irq_restore(flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| + |
| |
| out: |
| preempt_enable(); |
| @@ -3522,17 +3541,18 @@ ring_buffer_read_start(struct ring_buffer_iter *iter) |
| { |
| struct ring_buffer_per_cpu *cpu_buffer; |
| unsigned long flags; |
| + int locked; |
| |
| if (!iter) |
| return; |
| |
| cpu_buffer = iter->cpu_buffer; |
| |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| arch_spin_lock(&cpu_buffer->lock); |
| rb_iter_reset(iter); |
| arch_spin_unlock(&cpu_buffer->lock); |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| } |
| EXPORT_SYMBOL_GPL(ring_buffer_read_start); |
| |
| @@ -3566,8 +3586,9 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) |
| struct ring_buffer_event *event; |
| struct ring_buffer_per_cpu *cpu_buffer = iter->cpu_buffer; |
| unsigned long flags; |
| + int locked; |
| |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| again: |
| event = rb_iter_peek(iter, ts); |
| if (!event) |
| @@ -3578,7 +3599,7 @@ ring_buffer_read(struct ring_buffer_iter *iter, u64 *ts) |
| |
| rb_advance_iter(iter); |
| out: |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| return event; |
| } |
| @@ -3643,13 +3664,14 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
| { |
| struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu]; |
| unsigned long flags; |
| + int locked; |
| |
| if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| return; |
| |
| atomic_inc(&cpu_buffer->record_disabled); |
| |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| |
| if (RB_WARN_ON(cpu_buffer, local_read(&cpu_buffer->committing))) |
| goto out; |
| @@ -3661,7 +3683,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) |
| arch_spin_unlock(&cpu_buffer->lock); |
| |
| out: |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| atomic_dec(&cpu_buffer->record_disabled); |
| } |
| @@ -3688,22 +3710,16 @@ int ring_buffer_empty(struct ring_buffer *buffer) |
| { |
| struct ring_buffer_per_cpu *cpu_buffer; |
| unsigned long flags; |
| - int dolock; |
| + int locked; |
| int cpu; |
| int ret; |
| |
| - dolock = rb_ok_to_lock(); |
| - |
| /* yes this is racy, but if you don't like the race, lock the buffer */ |
| for_each_buffer_cpu(buffer, cpu) { |
| cpu_buffer = buffer->buffers[cpu]; |
| - local_irq_save(flags); |
| - if (dolock) |
| - raw_spin_lock(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| ret = rb_per_cpu_empty(cpu_buffer); |
| - if (dolock) |
| - raw_spin_unlock(&cpu_buffer->reader_lock); |
| - local_irq_restore(flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| if (!ret) |
| return 0; |
| @@ -3722,22 +3738,16 @@ int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu) |
| { |
| struct ring_buffer_per_cpu *cpu_buffer; |
| unsigned long flags; |
| - int dolock; |
| + int locked; |
| int ret; |
| |
| if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| return 1; |
| |
| - dolock = rb_ok_to_lock(); |
| - |
| cpu_buffer = buffer->buffers[cpu]; |
| - local_irq_save(flags); |
| - if (dolock) |
| - raw_spin_lock(&cpu_buffer->reader_lock); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| ret = rb_per_cpu_empty(cpu_buffer); |
| - if (dolock) |
| - raw_spin_unlock(&cpu_buffer->reader_lock); |
| - local_irq_restore(flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| return ret; |
| } |
| @@ -3912,6 +3922,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, |
| unsigned int commit; |
| unsigned int read; |
| u64 save_timestamp; |
| + int locked; |
| int ret = -1; |
| |
| if (!cpumask_test_cpu(cpu, buffer->cpumask)) |
| @@ -3933,7 +3944,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, |
| if (!bpage) |
| goto out; |
| |
| - raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); |
| + locked = read_buffer_lock(cpu_buffer, &flags); |
| |
| reader = rb_get_reader_page(cpu_buffer); |
| if (!reader) |
| @@ -4057,7 +4068,7 @@ int ring_buffer_read_page(struct ring_buffer *buffer, |
| memset(&bpage->data[commit], 0, BUF_PAGE_SIZE - commit); |
| |
| out_unlock: |
| - raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags); |
| + read_buffer_unlock(cpu_buffer, flags, locked); |
| |
| out: |
| return ret; |
| -- |
| 1.7.10.4 |
| |