| From 47d053171f1c30bbe93d818b7b94ff89bec58055 Mon Sep 17 00:00:00 2001 |
| From: Ingo Molnar <mingo@elte.hu> |
| Date: Fri, 3 Jul 2009 08:29:19 -0500 |
| Subject: [PATCH] revert preempt BKL revert |
| |
| commit 37ffffaf131b6620d27af5a1477f6db507718018 in tip. |
| |
| [ basically, this is the -R of 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 ] |
| |
| While we understand that preemptible BKL is not a brilliant idea in |
| the first place, we have not the capacity of developers to fix all the |
| BKL leftovers right away. For PREEMPT-RT we rely on preemptible BKL |
| for now. We still look into removing BKL completely and it's high on |
| our todo list. |
| |
| Signed-off-by: Ingo Molnar <mingo@elte.hu> |
| Signed-off-by: Thomas Gleixner <tglx@linutronix.de> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h |
| index d5b3876..5d79504 100644 |
| --- a/include/linux/hardirq.h |
| +++ b/include/linux/hardirq.h |
| @@ -92,14 +92,6 @@ |
| */ |
| #define in_nmi() (preempt_count() & NMI_MASK) |
| |
| -#if defined(CONFIG_PREEMPT) |
| -# define PREEMPT_INATOMIC_BASE kernel_locked() |
| -# define PREEMPT_CHECK_OFFSET 1 |
| -#else |
| -# define PREEMPT_INATOMIC_BASE 0 |
| -# define PREEMPT_CHECK_OFFSET 0 |
| -#endif |
| - |
| /* |
| * Are we running in atomic context? WARNING: this macro cannot |
| * always detect atomic context; in particular, it cannot know about |
| @@ -107,11 +99,17 @@ |
| * used in the general case to determine whether sleeping is possible. |
| * Do not use in_atomic() in driver code. |
| */ |
| -#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE) |
| +#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0) |
| + |
| +#ifdef CONFIG_PREEMPT |
| +# define PREEMPT_CHECK_OFFSET 1 |
| +#else |
| +# define PREEMPT_CHECK_OFFSET 0 |
| +#endif |
| |
| /* |
| * Check whether we were atomic before we did preempt_disable(): |
| - * (used by the scheduler, *after* releasing the kernel lock) |
| + * (used by the scheduler) |
| */ |
| #define in_atomic_preempt_off() \ |
| ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET) |
| diff --git a/kernel/sched.c b/kernel/sched.c |
| index b859b1e..d5034c3 100644 |
| --- a/kernel/sched.c |
| +++ b/kernel/sched.c |
| @@ -5639,6 +5639,8 @@ out: |
| asmlinkage void __sched preempt_schedule(void) |
| { |
| struct thread_info *ti = current_thread_info(); |
| + struct task_struct *task = current; |
| + int saved_lock_depth; |
| |
| /* |
| * If there is a non-zero preempt_count or interrupts are disabled, |
| @@ -5649,7 +5651,16 @@ asmlinkage void __sched preempt_schedule(void) |
| |
| do { |
| add_preempt_count(PREEMPT_ACTIVE); |
| + |
| + /* |
| + * We keep the big kernel semaphore locked, but we |
| + * clear ->lock_depth so that schedule() doesnt |
| + * auto-release the semaphore: |
| + */ |
| + saved_lock_depth = task->lock_depth; |
| + task->lock_depth = -1; |
| schedule(); |
| + task->lock_depth = saved_lock_depth; |
| sub_preempt_count(PREEMPT_ACTIVE); |
| |
| /* |
| @@ -5670,15 +5681,26 @@ EXPORT_SYMBOL(preempt_schedule); |
| asmlinkage void __sched preempt_schedule_irq(void) |
| { |
| struct thread_info *ti = current_thread_info(); |
| + struct task_struct *task = current; |
| + int saved_lock_depth; |
| |
| /* Catch callers which need to be fixed */ |
| BUG_ON(ti->preempt_count || !irqs_disabled()); |
| |
| do { |
| add_preempt_count(PREEMPT_ACTIVE); |
| + |
| + /* |
| + * We keep the big kernel semaphore locked, but we |
| + * clear ->lock_depth so that schedule() doesnt |
| + * auto-release the semaphore: |
| + */ |
| + saved_lock_depth = task->lock_depth; |
| + task->lock_depth = -1; |
| local_irq_enable(); |
| schedule(); |
| local_irq_disable(); |
| + task->lock_depth = saved_lock_depth; |
| sub_preempt_count(PREEMPT_ACTIVE); |
| |
| /* |
| @@ -7058,11 +7080,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) |
| raw_spin_unlock_irqrestore(&rq->lock, flags); |
| |
| /* Set the preempt count _outside_ the spinlocks! */ |
| -#if defined(CONFIG_PREEMPT) |
| - task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0); |
| -#else |
| task_thread_info(idle)->preempt_count = 0; |
| -#endif |
| + |
| /* |
| * The idle tasks have their own, simple scheduling class: |
| */ |
| diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c |
| index 5354922..2c9b548 100644 |
| --- a/lib/kernel_lock.c |
| +++ b/lib/kernel_lock.c |
| @@ -14,107 +14,56 @@ |
| #include <trace/events/bkl.h> |
| |
| /* |
| - * The 'big kernel lock' |
| + * The 'big kernel semaphore' |
| * |
| - * This spinlock is taken and released recursively by lock_kernel() |
| + * This mutex is taken and released recursively by lock_kernel() |
| * and unlock_kernel(). It is transparently dropped and reacquired |
| * over schedule(). It is used to protect legacy code that hasn't |
| * been migrated to a proper locking design yet. |
| * |
| + * Note: code locked by this semaphore will only be serialized against |
| + * other code using the same locking facility. The code guarantees that |
| + * the task remains on the same CPU. |
| + * |
| * Don't use in new code. |
| */ |
| -static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag); |
| - |
| +static struct semaphore kernel_sem; |
| |
| /* |
| - * Acquire/release the underlying lock from the scheduler. |
| + * Re-acquire the kernel semaphore. |
| * |
| - * This is called with preemption disabled, and should |
| - * return an error value if it cannot get the lock and |
| - * TIF_NEED_RESCHED gets set. |
| + * This function is called with preemption off. |
| * |
| - * If it successfully gets the lock, it should increment |
| - * the preemption count like any spinlock does. |
| - * |
| - * (This works on UP too - do_raw_spin_trylock will never |
| - * return false in that case) |
| + * We are executing in schedule() so the code must be extremely careful |
| + * about recursion, both due to the down() and due to the enabling of |
| + * preemption. schedule() will re-check the preemption flag after |
| + * reacquiring the semaphore. |
| */ |
| int __lockfunc __reacquire_kernel_lock(void) |
| { |
| - while (!do_raw_spin_trylock(&kernel_flag)) { |
| - if (need_resched()) |
| - return -EAGAIN; |
| - cpu_relax(); |
| - } |
| - preempt_disable(); |
| - return 0; |
| -} |
| + int saved_lock_depth = current->lock_depth; |
| |
| -void __lockfunc __release_kernel_lock(void) |
| -{ |
| - do_raw_spin_unlock(&kernel_flag); |
| - __preempt_enable_no_resched(); |
| -} |
| + BUG_ON(saved_lock_depth < 0); |
| |
| -/* |
| - * These are the BKL spinlocks - we try to be polite about preemption. |
| - * If SMP is not on (ie UP preemption), this all goes away because the |
| - * do_raw_spin_trylock() will always succeed. |
| - */ |
| -#ifdef CONFIG_PREEMPT |
| -static inline void __lock_kernel(void) |
| -{ |
| - preempt_disable(); |
| - if (unlikely(!do_raw_spin_trylock(&kernel_flag))) { |
| - /* |
| - * If preemption was disabled even before this |
| - * was called, there's nothing we can be polite |
| - * about - just spin. |
| - */ |
| - if (preempt_count() > 1) { |
| - do_raw_spin_lock(&kernel_flag); |
| - return; |
| - } |
| + current->lock_depth = -1; |
| + local_irq_enable(); |
| |
| - /* |
| - * Otherwise, let's wait for the kernel lock |
| - * with preemption enabled.. |
| - */ |
| - do { |
| - preempt_enable(); |
| - while (raw_spin_is_locked(&kernel_flag)) |
| - cpu_relax(); |
| - preempt_disable(); |
| - } while (!do_raw_spin_trylock(&kernel_flag)); |
| - } |
| -} |
| + down(&kernel_sem); |
| |
| -#else |
| + preempt_disable(); |
| + local_irq_disable(); |
| + current->lock_depth = saved_lock_depth; |
| |
| -/* |
| - * Non-preemption case - just get the spinlock |
| - */ |
| -static inline void __lock_kernel(void) |
| -{ |
| - do_raw_spin_lock(&kernel_flag); |
| + return 0; |
| } |
| -#endif |
| |
| -static inline void __unlock_kernel(void) |
| +void __lockfunc __release_kernel_lock(void) |
| { |
| - /* |
| - * the BKL is not covered by lockdep, so we open-code the |
| - * unlocking sequence (and thus avoid the dep-chain ops): |
| - */ |
| - do_raw_spin_unlock(&kernel_flag); |
| - preempt_enable(); |
| + up(&kernel_sem); |
| } |
| |
| /* |
| - * Getting the big kernel lock. |
| - * |
| - * This cannot happen asynchronously, so we only need to |
| - * worry about other CPU's. |
| + * Getting the big kernel semaphore. |
| */ |
| void __lockfunc _lock_kernel(const char *func, const char *file, int line) |
| { |
| @@ -124,7 +73,10 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) |
| |
| if (likely(!depth)) { |
| might_sleep(); |
| - __lock_kernel(); |
| + /* |
| + * No recursion worries - we set up lock_depth _after_ |
| + */ |
| + down(&kernel_sem); |
| } |
| current->lock_depth = depth; |
| } |
| @@ -132,8 +84,9 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line) |
| void __lockfunc _unlock_kernel(const char *func, const char *file, int line) |
| { |
| BUG_ON(current->lock_depth < 0); |
| + |
| if (likely(--current->lock_depth < 0)) |
| - __unlock_kernel(); |
| + up(&kernel_sem); |
| |
| trace_unlock_kernel(func, file, line); |
| } |
| -- |
| 1.7.1.1 |
| |