| From ab6f762f0f53162d41497708b33c9a3236d3609e Mon Sep 17 00:00:00 2001 |
| From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> |
| Date: Tue, 3 Mar 2020 20:30:02 +0900 |
| Subject: printk: queue wake_up_klogd irq_work only if per-CPU areas are ready |
| |
| From: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> |
| |
| commit ab6f762f0f53162d41497708b33c9a3236d3609e upstream. |
| |
| printk_deferred(), similarly to printk_safe/printk_nmi, does not |
| immediately attempt to print a new message on the consoles, avoiding |
| calls into non-reentrant kernel paths, e.g. scheduler or timekeeping, |
| which potentially can deadlock the system. |
| |
| Those printk() flavors, instead, rely on per-CPU flush irq_work to print |
| messages from safer contexts. For same reasons (recursive scheduler or |
| timekeeping calls) printk() uses per-CPU irq_work in order to wake up |
| user space syslog/kmsg readers. |
| |
| However, only printk_safe/printk_nmi do make sure that per-CPU areas |
| have been initialised and that it's safe to modify per-CPU irq_work. |
| This means that, for instance, should printk_deferred() be invoked "too |
| early", that is before per-CPU areas are initialised, printk_deferred() |
| will perform illegal per-CPU access. |
| |
| Lech Perczak [0] reports that after commit 1b710b1b10ef ("char/random: |
| silence a lockdep splat with printk()") user-space syslog/kmsg readers |
| are not able to read new kernel messages. |
| |
| The reason is printk_deferred() being called too early (as was pointed |
| out by Petr and John). |
| |
| Fix printk_deferred() and do not queue per-CPU irq_work before per-CPU |
| areas are initialized. |
| |
| Link: https://lore.kernel.org/lkml/aa0732c6-5c4e-8a8b-a1c1-75ebe3dca05b@camlintechnologies.com/ |
| Reported-by: Lech Perczak <l.perczak@camlintechnologies.com> |
| Signed-off-by: Sergey Senozhatsky <sergey.senozhatsky@gmail.com> |
| Tested-by: Jann Horn <jannh@google.com> |
| Reviewed-by: Petr Mladek <pmladek@suse.com> |
| Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| Cc: Theodore Ts'o <tytso@mit.edu> |
| Cc: John Ogness <john.ogness@linutronix.de> |
| Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org> |
| Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org> |
| |
| --- |
| include/linux/printk.h | 5 ----- |
| init/main.c | 1 - |
| kernel/printk/internal.h | 5 +++++ |
| kernel/printk/printk.c | 34 ++++++++++++++++++++++++++++++++++ |
| kernel/printk/printk_safe.c | 11 +---------- |
| 5 files changed, 40 insertions(+), 16 deletions(-) |
| |
| --- a/include/linux/printk.h |
| +++ b/include/linux/printk.h |
| @@ -206,7 +206,6 @@ __printf(1, 2) void dump_stack_set_arch_ |
| void dump_stack_print_info(const char *log_lvl); |
| void show_regs_print_info(const char *log_lvl); |
| extern asmlinkage void dump_stack(void) __cold; |
| -extern void printk_safe_init(void); |
| extern void printk_safe_flush(void); |
| extern void printk_safe_flush_on_panic(void); |
| #else |
| @@ -273,10 +272,6 @@ static inline asmlinkage void dump_stack |
| { |
| } |
| |
| -static inline void printk_safe_init(void) |
| -{ |
| -} |
| - |
| static inline void printk_safe_flush(void) |
| { |
| } |
| --- a/init/main.c |
| +++ b/init/main.c |
| @@ -641,7 +641,6 @@ asmlinkage __visible void __init start_k |
| softirq_init(); |
| timekeeping_init(); |
| time_init(); |
| - printk_safe_init(); |
| perf_event_init(); |
| profile_init(); |
| call_function_init(); |
| --- a/kernel/printk/internal.h |
| +++ b/kernel/printk/internal.h |
| @@ -35,6 +35,9 @@ __printf(1, 0) int vprintk_func(const ch |
| void __printk_safe_enter(void); |
| void __printk_safe_exit(void); |
| |
| +void printk_safe_init(void); |
| +bool printk_percpu_data_ready(void); |
| + |
| #define printk_safe_enter_irqsave(flags) \ |
| do { \ |
| local_irq_save(flags); \ |
| @@ -76,4 +79,6 @@ __printf(1, 0) int vprintk_func(const ch |
| #define printk_safe_enter_irq() local_irq_disable() |
| #define printk_safe_exit_irq() local_irq_enable() |
| |
| +static inline void printk_safe_init(void) { } |
| +static inline bool printk_percpu_data_ready(void) { return false; } |
| #endif /* CONFIG_PRINTK */ |
| --- a/kernel/printk/printk.c |
| +++ b/kernel/printk/printk.c |
| @@ -443,6 +443,18 @@ static char __log_buf[__LOG_BUF_LEN] __a |
| static char *log_buf = __log_buf; |
| static u32 log_buf_len = __LOG_BUF_LEN; |
| |
| +/* |
| + * We cannot access per-CPU data (e.g. per-CPU flush irq_work) before |
| + * per_cpu_areas are initialised. This variable is set to true when |
| + * it's safe to access per-CPU data. |
| + */ |
| +static bool __printk_percpu_data_ready __read_mostly; |
| + |
| +bool printk_percpu_data_ready(void) |
| +{ |
| + return __printk_percpu_data_ready; |
| +} |
| + |
| /* Return log buffer address */ |
| char *log_buf_addr_get(void) |
| { |
| @@ -1101,12 +1113,28 @@ static void __init log_buf_add_cpu(void) |
| static inline void log_buf_add_cpu(void) {} |
| #endif /* CONFIG_SMP */ |
| |
| +static void __init set_percpu_data_ready(void) |
| +{ |
| + printk_safe_init(); |
| + /* Make sure we set this flag only after printk_safe() init is done */ |
| + barrier(); |
| + __printk_percpu_data_ready = true; |
| +} |
| + |
| void __init setup_log_buf(int early) |
| { |
| unsigned long flags; |
| char *new_log_buf; |
| unsigned int free; |
| |
| + /* |
| + * Some archs call setup_log_buf() multiple times - first is very |
| + * early, e.g. from setup_arch(), and second - when percpu_areas |
| + * are initialised. |
| + */ |
| + if (!early) |
| + set_percpu_data_ready(); |
| + |
| if (log_buf != __log_buf) |
| return; |
| |
| @@ -2913,6 +2941,9 @@ static DEFINE_PER_CPU(struct irq_work, w |
| |
| void wake_up_klogd(void) |
| { |
| + if (!printk_percpu_data_ready()) |
| + return; |
| + |
| preempt_disable(); |
| if (waitqueue_active(&log_wait)) { |
| this_cpu_or(printk_pending, PRINTK_PENDING_WAKEUP); |
| @@ -2923,6 +2954,9 @@ void wake_up_klogd(void) |
| |
| void defer_console_output(void) |
| { |
| + if (!printk_percpu_data_ready()) |
| + return; |
| + |
| preempt_disable(); |
| __this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT); |
| irq_work_queue(this_cpu_ptr(&wake_up_klogd_work)); |
| --- a/kernel/printk/printk_safe.c |
| +++ b/kernel/printk/printk_safe.c |
| @@ -39,7 +39,6 @@ |
| * There are situations when we want to make sure that all buffers |
| * were handled or when IRQs are blocked. |
| */ |
| -static int printk_safe_irq_ready __read_mostly; |
| |
| #define SAFE_LOG_BUF_LEN ((1 << CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT) - \ |
| sizeof(atomic_t) - \ |
| @@ -63,7 +62,7 @@ static DEFINE_PER_CPU(struct printk_safe |
| /* Get flushed in a more safe context. */ |
| static void queue_flush_work(struct printk_safe_seq_buf *s) |
| { |
| - if (printk_safe_irq_ready) |
| + if (printk_percpu_data_ready()) |
| irq_work_queue(&s->work); |
| } |
| |
| @@ -414,14 +413,6 @@ void __init printk_safe_init(void) |
| #endif |
| } |
| |
| - /* |
| - * In the highly unlikely event that a NMI were to trigger at |
| - * this moment. Make sure IRQ work is set up before this |
| - * variable is set. |
| - */ |
| - barrier(); |
| - printk_safe_irq_ready = 1; |
| - |
| /* Flush pending messages that did not have scheduled IRQ works. */ |
| printk_safe_flush(); |
| } |