revert-preempt-BKL-revert.patch - pub/scm/linux/kernel/git/paulg/rt-patches - Git at Google

 From 47d053171f1c30bbe93d818b7b94ff89bec58055 Mon Sep 17 00:00:00 2001
 From: Ingo Molnar <mingo@elte.hu>
 Date: Fri, 3 Jul 2009 08:29:19 -0500
 Subject: [PATCH] revert preempt BKL revert

 commit 37ffffaf131b6620d27af5a1477f6db507718018 in tip.

 [ basically, this is the -R of 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 ]

 While we understand that preemptible BKL is not a brilliant idea in
 the first place, we have not the capacity of developers to fix all the
 BKL leftovers right away. For PREEMPT-RT we rely on preemptible BKL
 for now. We still look into removing BKL completely and it's high on
 our todo list.

 Signed-off-by: Ingo Molnar <mingo@elte.hu>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

 diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
 index d5b3876..5d79504 100644
 --- a/include/linux/hardirq.h
 +++ b/include/linux/hardirq.h
 @@ -92,14 +92,6 @@
   */
  #define in_nmi()	(preempt_count() & NMI_MASK)

 -#if defined(CONFIG_PREEMPT)
 -# define PREEMPT_INATOMIC_BASE kernel_locked()
 -# define PREEMPT_CHECK_OFFSET 1
 -#else
 -# define PREEMPT_INATOMIC_BASE 0
 -# define PREEMPT_CHECK_OFFSET 0
 -#endif
 -
  /*
   * Are we running in atomic context?  WARNING: this macro cannot
   * always detect atomic context; in particular, it cannot know about
 @@ -107,11 +99,17 @@
   * used in the general case to determine whether sleeping is possible.
   * Do not use in_atomic() in driver code.
   */
 -#define in_atomic()	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
 +#define in_atomic()		((preempt_count() & ~PREEMPT_ACTIVE) != 0)
 +
 +#ifdef CONFIG_PREEMPT
 +# define PREEMPT_CHECK_OFFSET 1
 +#else
 +# define PREEMPT_CHECK_OFFSET 0
 +#endif

  /*
   * Check whether we were atomic before we did preempt_disable():
 - * (used by the scheduler, *after* releasing the kernel lock)
 + * (used by the scheduler)
   */
  #define in_atomic_preempt_off() \
  		((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
 diff --git a/kernel/sched.c b/kernel/sched.c
 index b859b1e..d5034c3 100644
 --- a/kernel/sched.c
 +++ b/kernel/sched.c
 @@ -5639,6 +5639,8 @@ out:
  asmlinkage void __sched preempt_schedule(void)
  {
  	struct thread_info *ti = current_thread_info();
 +	struct task_struct *task = current;
 +	int saved_lock_depth;

  	/*
  	 * If there is a non-zero preempt_count or interrupts are disabled,
 @@ -5649,7 +5651,16 @@ asmlinkage void __sched preempt_schedule(void)

  	do {
  		add_preempt_count(PREEMPT_ACTIVE);
 +
 +		/*
 +		 * We keep the big kernel semaphore locked, but we
 +		 * clear ->lock_depth so that schedule() doesnt
 +		 * auto-release the semaphore:
 +		 */
 +		saved_lock_depth = task->lock_depth;
 +		task->lock_depth = -1;
  		schedule();
 +		task->lock_depth = saved_lock_depth;
  		sub_preempt_count(PREEMPT_ACTIVE);

  		/*
 @@ -5670,15 +5681,26 @@ EXPORT_SYMBOL(preempt_schedule);
  asmlinkage void __sched preempt_schedule_irq(void)
  {
  	struct thread_info *ti = current_thread_info();
 +	struct task_struct *task = current;
 +	int saved_lock_depth;

  	/* Catch callers which need to be fixed */
  	BUG_ON(ti->preempt_count || !irqs_disabled());

  	do {
  		add_preempt_count(PREEMPT_ACTIVE);
 +
 +		/*
 +		 * We keep the big kernel semaphore locked, but we
 +		 * clear ->lock_depth so that schedule() doesnt
 +		 * auto-release the semaphore:
 +		 */
 +		saved_lock_depth = task->lock_depth;
 +		task->lock_depth = -1;
  		local_irq_enable();
  		schedule();
  		local_irq_disable();
 +		task->lock_depth = saved_lock_depth;
  		sub_preempt_count(PREEMPT_ACTIVE);

  		/*
 @@ -7058,11 +7080,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
  	raw_spin_unlock_irqrestore(&rq->lock, flags);

  	/* Set the preempt count _outside_ the spinlocks! */
 -#if defined(CONFIG_PREEMPT)
 -	task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
 -#else
  	task_thread_info(idle)->preempt_count = 0;
 -#endif
 +
  	/*
  	 * The idle tasks have their own, simple scheduling class:
  	 */
 diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
 index 5354922..2c9b548 100644
 --- a/lib/kernel_lock.c
 +++ b/lib/kernel_lock.c
 @@ -14,107 +14,56 @@
  #include <trace/events/bkl.h>

  /*
 - * The 'big kernel lock'
 + * The 'big kernel semaphore'
   *
 - * This spinlock is taken and released recursively by lock_kernel()
 + * This mutex is taken and released recursively by lock_kernel()
   * and unlock_kernel().  It is transparently dropped and reacquired
   * over schedule().  It is used to protect legacy code that hasn't
   * been migrated to a proper locking design yet.
   *
 + * Note: code locked by this semaphore will only be serialized against
 + * other code using the same locking facility. The code guarantees that
 + * the task remains on the same CPU.
 + *
   * Don't use in new code.
   */
 -static  __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
 -
 +static struct semaphore kernel_sem;

  /*
 - * Acquire/release the underlying lock from the scheduler.
 + * Re-acquire the kernel semaphore.
   *
 - * This is called with preemption disabled, and should
 - * return an error value if it cannot get the lock and
 - * TIF_NEED_RESCHED gets set.
 + * This function is called with preemption off.
   *
 - * If it successfully gets the lock, it should increment
 - * the preemption count like any spinlock does.
 - *
 - * (This works on UP too - do_raw_spin_trylock will never
 - * return false in that case)
 + * We are executing in schedule() so the code must be extremely careful
 + * about recursion, both due to the down() and due to the enabling of
 + * preemption. schedule() will re-check the preemption flag after
 + * reacquiring the semaphore.
   */
  int __lockfunc __reacquire_kernel_lock(void)
  {
 -	while (!do_raw_spin_trylock(&kernel_flag)) {
 -		if (need_resched())
 -			return -EAGAIN;
 -		cpu_relax();
 -	}
 -	preempt_disable();
 -	return 0;
 -}
 +	int saved_lock_depth = current->lock_depth;

 -void __lockfunc __release_kernel_lock(void)
 -{
 -	do_raw_spin_unlock(&kernel_flag);
 -	__preempt_enable_no_resched();
 -}
 +	BUG_ON(saved_lock_depth < 0);

 -/*
 - * These are the BKL spinlocks - we try to be polite about preemption.
 - * If SMP is not on (ie UP preemption), this all goes away because the
 - * do_raw_spin_trylock() will always succeed.
 - */
 -#ifdef CONFIG_PREEMPT
 -static inline void __lock_kernel(void)
 -{
 -	preempt_disable();
 -	if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
 -		/*
 -		 * If preemption was disabled even before this
 -		 * was called, there's nothing we can be polite
 -		 * about - just spin.
 -		 */
 -		if (preempt_count() > 1) {
 -			do_raw_spin_lock(&kernel_flag);
 -			return;
 -		}
 +	current->lock_depth = -1;
 +	local_irq_enable();

 -		/*
 -		 * Otherwise, let's wait for the kernel lock
 -		 * with preemption enabled..
 -		 */
 -		do {
 -			preempt_enable();
 -			while (raw_spin_is_locked(&kernel_flag))
 -				cpu_relax();
 -			preempt_disable();
 -		} while (!do_raw_spin_trylock(&kernel_flag));
 -	}
 -}
 +	down(&kernel_sem);

 -#else
 +	preempt_disable();
 +	local_irq_disable();
 +	current->lock_depth = saved_lock_depth;

 -/*
 - * Non-preemption case - just get the spinlock
 - */
 -static inline void __lock_kernel(void)
 -{
 -	do_raw_spin_lock(&kernel_flag);
 +	return 0;
  }
 -#endif

 -static inline void __unlock_kernel(void)
 +void __lockfunc __release_kernel_lock(void)
  {
 -	/*
 -	 * the BKL is not covered by lockdep, so we open-code the
 -	 * unlocking sequence (and thus avoid the dep-chain ops):
 -	 */
 -	do_raw_spin_unlock(&kernel_flag);
 -	preempt_enable();
 +	up(&kernel_sem);
  }

  /*
 - * Getting the big kernel lock.
 - *
 - * This cannot happen asynchronously, so we only need to
 - * worry about other CPU's.
 + * Getting the big kernel semaphore.
   */
  void __lockfunc _lock_kernel(const char *func, const char *file, int line)
  {
 @@ -124,7 +73,10 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line)

  	if (likely(!depth)) {
  		might_sleep();
 -		__lock_kernel();
 +		/*
 +		 * No recursion worries - we set up lock_depth _after_
 +		 */
 +		down(&kernel_sem);
  	}
  	current->lock_depth = depth;
  }
 @@ -132,8 +84,9 @@ void __lockfunc _lock_kernel(const char *func, const char *file, int line)
  void __lockfunc _unlock_kernel(const char *func, const char *file, int line)
  {
  	BUG_ON(current->lock_depth < 0);
 +
  	if (likely(--current->lock_depth < 0))
 -		__unlock_kernel();
 +		up(&kernel_sem);

  	trace_unlock_kernel(func, file, line);
  }
 --
 1.7.1.1
	From 47d053171f1c30bbe93d818b7b94ff89bec58055 Mon Sep 17 00:00:00 2001
	From: Ingo Molnar <mingo@elte.hu>
	Date: Fri, 3 Jul 2009 08:29:19 -0500
	Subject: [PATCH] revert preempt BKL revert

	commit 37ffffaf131b6620d27af5a1477f6db507718018 in tip.

	[ basically, this is the -R of 8e3e076c5a78519a9f64cd384e8f18bc21882ce0 ]

	While we understand that preemptible BKL is not a brilliant idea in
	the first place, we have not the capacity of developers to fix all the
	BKL leftovers right away. For PREEMPT-RT we rely on preemptible BKL
	for now. We still look into removing BKL completely and it's high on
	our todo list.

	Signed-off-by: Ingo Molnar <mingo@elte.hu>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

	diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h
	index d5b3876..5d79504 100644
	--- a/include/linux/hardirq.h
	+++ b/include/linux/hardirq.h
	@@ -92,14 +92,6 @@
	*/
	#define in_nmi() (preempt_count() & NMI_MASK)

	-#if defined(CONFIG_PREEMPT)
	-# define PREEMPT_INATOMIC_BASE kernel_locked()
	-# define PREEMPT_CHECK_OFFSET 1
	-#else
	-# define PREEMPT_INATOMIC_BASE 0
	-# define PREEMPT_CHECK_OFFSET 0
	-#endif
	-
	/*
	* Are we running in atomic context? WARNING: this macro cannot
	* always detect atomic context; in particular, it cannot know about
	@@ -107,11 +99,17 @@
	* used in the general case to determine whether sleeping is possible.
	* Do not use in_atomic() in driver code.
	*/
	-#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_INATOMIC_BASE)
	+#define in_atomic() ((preempt_count() & ~PREEMPT_ACTIVE) != 0)
	+
	+#ifdef CONFIG_PREEMPT
	+# define PREEMPT_CHECK_OFFSET 1
	+#else
	+# define PREEMPT_CHECK_OFFSET 0
	+#endif

	/*
	* Check whether we were atomic before we did preempt_disable():
	- * (used by the scheduler, after releasing the kernel lock)
	+ * (used by the scheduler)
	*/
	#define in_atomic_preempt_off() \
	((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_CHECK_OFFSET)
	diff --git a/kernel/sched.c b/kernel/sched.c
	index b859b1e..d5034c3 100644
	--- a/kernel/sched.c
	+++ b/kernel/sched.c
	@@ -5639,6 +5639,8 @@ out:
	asmlinkage void __sched preempt_schedule(void)
	{
	struct thread_info *ti = current_thread_info();
	+ struct task_struct *task = current;
	+ int saved_lock_depth;

	/*
	* If there is a non-zero preempt_count or interrupts are disabled,
	@@ -5649,7 +5651,16 @@ asmlinkage void __sched preempt_schedule(void)

	do {
	add_preempt_count(PREEMPT_ACTIVE);
	+
	+ /*
	+ * We keep the big kernel semaphore locked, but we
	+ * clear ->lock_depth so that schedule() doesnt
	+ * auto-release the semaphore:
	+ */
	+ saved_lock_depth = task->lock_depth;
	+ task->lock_depth = -1;
	schedule();
	+ task->lock_depth = saved_lock_depth;
	sub_preempt_count(PREEMPT_ACTIVE);

	/*
	@@ -5670,15 +5681,26 @@ EXPORT_SYMBOL(preempt_schedule);
	asmlinkage void __sched preempt_schedule_irq(void)
	{
	struct thread_info *ti = current_thread_info();
	+ struct task_struct *task = current;
	+ int saved_lock_depth;

	/* Catch callers which need to be fixed */
	BUG_ON(ti->preempt_count \|\| !irqs_disabled());

	do {
	add_preempt_count(PREEMPT_ACTIVE);
	+
	+ /*
	+ * We keep the big kernel semaphore locked, but we
	+ * clear ->lock_depth so that schedule() doesnt
	+ * auto-release the semaphore:
	+ */
	+ saved_lock_depth = task->lock_depth;
	+ task->lock_depth = -1;
	local_irq_enable();
	schedule();
	local_irq_disable();
	+ task->lock_depth = saved_lock_depth;
	sub_preempt_count(PREEMPT_ACTIVE);

	/*
	@@ -7058,11 +7080,8 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
	raw_spin_unlock_irqrestore(&rq->lock, flags);

	/* Set the preempt count _outside_ the spinlocks! */
	-#if defined(CONFIG_PREEMPT)
	- task_thread_info(idle)->preempt_count = (idle->lock_depth >= 0);
	-#else
	task_thread_info(idle)->preempt_count = 0;
	-#endif
	+
	/*
	* The idle tasks have their own, simple scheduling class:
	*/
	diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
	index 5354922..2c9b548 100644
	--- a/lib/kernel_lock.c
	+++ b/lib/kernel_lock.c
	@@ -14,107 +14,56 @@
	#include <trace/events/bkl.h>

	/*
	- * The 'big kernel lock'
	+ * The 'big kernel semaphore'
	*
	- * This spinlock is taken and released recursively by lock_kernel()
	+ * This mutex is taken and released recursively by lock_kernel()
	* and unlock_kernel(). It is transparently dropped and reacquired
	* over schedule(). It is used to protect legacy code that hasn't
	* been migrated to a proper locking design yet.
	*
	+ * Note: code locked by this semaphore will only be serialized against
	+ * other code using the same locking facility. The code guarantees that
	+ * the task remains on the same CPU.
	+ *
	* Don't use in new code.
	*/
	-static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(kernel_flag);
	-
	+static struct semaphore kernel_sem;

	/*
	- * Acquire/release the underlying lock from the scheduler.
	+ * Re-acquire the kernel semaphore.
	*
	- * This is called with preemption disabled, and should
	- * return an error value if it cannot get the lock and
	- * TIF_NEED_RESCHED gets set.
	+ * This function is called with preemption off.
	*
	- * If it successfully gets the lock, it should increment
	- * the preemption count like any spinlock does.
	- *
	- * (This works on UP too - do_raw_spin_trylock will never
	- * return false in that case)
	+ * We are executing in schedule() so the code must be extremely careful
	+ * about recursion, both due to the down() and due to the enabling of
	+ * preemption. schedule() will re-check the preemption flag after
	+ * reacquiring the semaphore.
	*/
	int __lockfunc __reacquire_kernel_lock(void)
	{
	- while (!do_raw_spin_trylock(&kernel_flag)) {
	- if (need_resched())
	- return -EAGAIN;
	- cpu_relax();
	- }
	- preempt_disable();
	- return 0;
	-}
	+ int saved_lock_depth = current->lock_depth;

	-void __lockfunc __release_kernel_lock(void)
	-{
	- do_raw_spin_unlock(&kernel_flag);
	- __preempt_enable_no_resched();
	-}
	+ BUG_ON(saved_lock_depth < 0);

	-/*
	- * These are the BKL spinlocks - we try to be polite about preemption.
	- * If SMP is not on (ie UP preemption), this all goes away because the
	- * do_raw_spin_trylock() will always succeed.
	- */
	-#ifdef CONFIG_PREEMPT
	-static inline void __lock_kernel(void)
	-{
	- preempt_disable();
	- if (unlikely(!do_raw_spin_trylock(&kernel_flag))) {
	- /*
	- * If preemption was disabled even before this
	- * was called, there's nothing we can be polite
	- * about - just spin.
	- */
	- if (preempt_count() > 1) {
	- do_raw_spin_lock(&kernel_flag);
	- return;
	- }
	+ current->lock_depth = -1;
	+ local_irq_enable();

	- /*
	- * Otherwise, let's wait for the kernel lock
	- * with preemption enabled..
	- */
	- do {
	- preempt_enable();
	- while (raw_spin_is_locked(&kernel_flag))
	- cpu_relax();
	- preempt_disable();
	- } while (!do_raw_spin_trylock(&kernel_flag));
	- }
	-}
	+ down(&kernel_sem);

	-#else
	+ preempt_disable();
	+ local_irq_disable();
	+ current->lock_depth = saved_lock_depth;

	-/*
	- * Non-preemption case - just get the spinlock
	- */
	-static inline void __lock_kernel(void)
	-{
	- do_raw_spin_lock(&kernel_flag);
	+ return 0;
	}
	-#endif

	-static inline void __unlock_kernel(void)
	+void __lockfunc __release_kernel_lock(void)
	{
	- /*
	- * the BKL is not covered by lockdep, so we open-code the
	- * unlocking sequence (and thus avoid the dep-chain ops):
	- */
	- do_raw_spin_unlock(&kernel_flag);
	- preempt_enable();
	+ up(&kernel_sem);
	}

	/*
	- * Getting the big kernel lock.
	- *
	- * This cannot happen asynchronously, so we only need to
	- * worry about other CPU's.
	+ * Getting the big kernel semaphore.
	*/
	void __lockfunc _lock_kernel(const char func, const char file, int line)
	{
	@@ -124,7 +73,10 @@ void __lockfunc _lock_kernel(const char func, const char file, int line)

	if (likely(!depth)) {
	might_sleep();
	- __lock_kernel();
	+ /*
	+ * No recursion worries - we set up lock_depth _after_
	+ */
	+ down(&kernel_sem);
	}
	current->lock_depth = depth;
	}
	@@ -132,8 +84,9 @@ void __lockfunc _lock_kernel(const char func, const char file, int line)
	void __lockfunc _unlock_kernel(const char func, const char file, int line)
	{
	BUG_ON(current->lock_depth < 0);
	+
	if (likely(--current->lock_depth < 0))
	- __unlock_kernel();
	+ up(&kernel_sem);

	trace_unlock_kernel(func, file, line);
	}
	--
	1.7.1.1