patches/rt-sched-postpone-actual-migration-disalbe-to-schedule.patch - pub/scm/linux/kernel/git/paulg/3.8-rt-patches - Git at Google

 Subject: sched: Postpone actual migration disalbe to schedule
 From: Steven Rostedt <rostedt@goodmis.org>
 Date: Tue, 27 Sep 2011 08:40:23 -0400

 The migrate_disable() can cause a bit of a overhead to the RT kernel,
 as changing the affinity is expensive to do at every lock encountered.
 As a running task can not migrate, the actual disabling of migration
 does not need to occur until the task is about to schedule out.

 In most cases, a task that disables migration will enable it before
 it schedules making this change improve performance tremendously.

 [ Frank Rowand: UP compile fix ]

 Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
 Cc: Peter Zijlstra <peterz@infradead.org>
 Cc: Clark Williams <williams@redhat.com>
 Link: http://lkml.kernel.org/r/20110927124422.779693167@goodmis.org
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

 ---
  kernel/sched/core.c |  251 +++++++++++++++++++++++++++-------------------------
  1 file changed, 132 insertions(+), 119 deletions(-)

 --- a/kernel/sched/core.c
 +++ b/kernel/sched/core.c
 @@ -2850,6 +2850,135 @@ static inline void schedule_debug(struct
  	schedstat_inc(this_rq(), sched_count);
  }

 +#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
 +#define MIGRATE_DISABLE_SET_AFFIN	(1<<30) /* Can't make a negative */
 +#define migrate_disabled_updated(p)	((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
 +#define migrate_disable_count(p)	((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
 +
 +static inline void update_migrate_disable(struct task_struct *p)
 +{
 +	const struct cpumask *mask;
 +
 +	if (likely(!p->migrate_disable))
 +		return;
 +
 +	/* Did we already update affinity? */
 +	if (unlikely(migrate_disabled_updated(p)))
 +		return;
 +
 +	/*
 +	 * Since this is always current we can get away with only locking
 +	 * rq->lock, the ->cpus_allowed value can normally only be changed
 +	 * while holding both p->pi_lock and rq->lock, but seeing that this
 +	 * is current, we cannot actually be waking up, so all code that
 +	 * relies on serialization against p->pi_lock is out of scope.
 +	 *
 +	 * Having rq->lock serializes us against things like
 +	 * set_cpus_allowed_ptr() that can still happen concurrently.
 +	 */
 +	mask = tsk_cpus_allowed(p);
 +
 +	WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
 +
 +	if (!cpumask_equal(&p->cpus_allowed, mask)) {
 +		if (p->sched_class->set_cpus_allowed)
 +			p->sched_class->set_cpus_allowed(p, mask);
 +		p->nr_cpus_allowed = cpumask_weight(mask);
 +
 +		/* Let migrate_enable know to fix things back up */
 +		p->migrate_disable |= MIGRATE_DISABLE_SET_AFFIN;
 +	}
 +}
 +
 +void migrate_disable(void)
 +{
 +	struct task_struct *p = current;
 +
 +	if (in_atomic()) {
 +#ifdef CONFIG_SCHED_DEBUG
 +		p->migrate_disable_atomic++;
 +#endif
 +		return;
 +	}
 +
 +#ifdef CONFIG_SCHED_DEBUG
 +	WARN_ON_ONCE(p->migrate_disable_atomic);
 +#endif
 +
 +	preempt_disable();
 +	if (p->migrate_disable) {
 +		p->migrate_disable++;
 +		preempt_enable();
 +		return;
 +	}
 +
 +	pin_current_cpu();
 +	p->migrate_disable = 1;
 +	preempt_enable();
 +}
 +EXPORT_SYMBOL(migrate_disable);
 +
 +void migrate_enable(void)
 +{
 +	struct task_struct *p = current;
 +	const struct cpumask *mask;
 +	unsigned long flags;
 +	struct rq *rq;
 +
 +	if (in_atomic()) {
 +#ifdef CONFIG_SCHED_DEBUG
 +		p->migrate_disable_atomic--;
 +#endif
 +		return;
 +	}
 +
 +#ifdef CONFIG_SCHED_DEBUG
 +	WARN_ON_ONCE(p->migrate_disable_atomic);
 +#endif
 +	WARN_ON_ONCE(p->migrate_disable <= 0);
 +
 +	preempt_disable();
 +	if (migrate_disable_count(p) > 1) {
 +		p->migrate_disable--;
 +		preempt_enable();
 +		return;
 +	}
 +
 +	if (unlikely(migrate_disabled_updated(p))) {
 +		/*
 +		 * See comment in update_migrate_disable() about locking.
 +		 */
 +		rq = this_rq();
 +		raw_spin_lock_irqsave(&rq->lock, flags);
 +		mask = tsk_cpus_allowed(p);
 +		/*
 +		 * Clearing migrate_disable causes tsk_cpus_allowed to
 +		 * show the tasks original cpu affinity.
 +		 */
 +		p->migrate_disable = 0;
 +
 +		WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
 +
 +		if (unlikely(!cpumask_equal(&p->cpus_allowed, mask))) {
 +			/* Get the mask now that migration is enabled */
 +			mask = tsk_cpus_allowed(p);
 +			if (p->sched_class->set_cpus_allowed)
 +				p->sched_class->set_cpus_allowed(p, mask);
 +			p->nr_cpus_allowed = cpumask_weight(mask);
 +		}
 +		raw_spin_unlock_irqrestore(&rq->lock, flags);
 +	} else
 +		p->migrate_disable = 0;
 +
 +	unpin_current_cpu();
 +	preempt_enable();
 +}
 +EXPORT_SYMBOL(migrate_enable);
 +#else
 +static inline void update_migrate_disable(struct task_struct *p) { }
 +#define migrate_disabled_updated(p)		0
 +#endif
 +
  static void put_prev_task(struct rq *rq, struct task_struct *prev)
  {
  	if (prev->on_rq || rq->skip_clock_update < 0)
 @@ -2943,6 +3072,8 @@ need_resched:

  	raw_spin_lock_irq(&rq->lock);

 +	update_migrate_disable(prev);
 +
  	switch_count = &prev->nivcsw;
  	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
  		if (unlikely(signal_pending_state(prev->state, prev))) {
 @@ -4736,7 +4867,7 @@ void __cpuinit init_idle(struct task_str
  #ifdef CONFIG_SMP
  void do_set_cpus_allowed(struct task_struct *p, const struct cpumask *new_mask)
  {
 -	if (!__migrate_disabled(p)) {
 +	if (!migrate_disabled_updated(p)) {
  		if (p->sched_class && p->sched_class->set_cpus_allowed)
  			p->sched_class->set_cpus_allowed(p, new_mask);
  		p->nr_cpus_allowed = cpumask_weight(new_mask);
 @@ -4811,124 +4942,6 @@ out:
  }
  EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);

 -#ifdef CONFIG_PREEMPT_RT_FULL
 -void migrate_disable(void)
 -{
 -	struct task_struct *p = current;
 -	const struct cpumask *mask;
 -	unsigned long flags;
 -	struct rq *rq;
 -
 -	if (in_atomic()) {
 -#ifdef CONFIG_SCHED_DEBUG
 -		p->migrate_disable_atomic++;
 -#endif
 -		return;
 -	}
 -
 -#ifdef CONFIG_SCHED_DEBUG
 -	WARN_ON_ONCE(p->migrate_disable_atomic);
 -#endif
 -
 -	preempt_disable();
 -	if (p->migrate_disable) {
 -		p->migrate_disable++;
 -		preempt_enable();
 -		return;
 -	}
 -
 -	pin_current_cpu();
 -	if (unlikely(!scheduler_running)) {
 -		p->migrate_disable = 1;
 -		preempt_enable();
 -		return;
 -	}
 -
 -	/*
 -	 * Since this is always current we can get away with only locking
 -	 * rq->lock, the ->cpus_allowed value can normally only be changed
 -	 * while holding both p->pi_lock and rq->lock, but seeing that this
 -	 * it current, we cannot actually be waking up, so all code that
 -	 * relies on serialization against p->pi_lock is out of scope.
 -	 *
 -	 * Taking rq->lock serializes us against things like
 -	 * set_cpus_allowed_ptr() that can still happen concurrently.
 -	 */
 -	rq = this_rq();
 -	raw_spin_lock_irqsave(&rq->lock, flags);
 -	p->migrate_disable = 1;
 -	mask = tsk_cpus_allowed(p);
 -
 -	WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
 -
 -	if (!cpumask_equal(&p->cpus_allowed, mask)) {
 -		if (p->sched_class->set_cpus_allowed)
 -			p->sched_class->set_cpus_allowed(p, mask);
 -		p->nr_cpus_allowed = cpumask_weight(mask);
 -	}
 -	raw_spin_unlock_irqrestore(&rq->lock, flags);
 -	preempt_enable();
 -}
 -EXPORT_SYMBOL(migrate_disable);
 -
 -void migrate_enable(void)
 -{
 -	struct task_struct *p = current;
 -	const struct cpumask *mask;
 -	unsigned long flags;
 -	struct rq *rq;
 -
 -	if (in_atomic()) {
 -#ifdef CONFIG_SCHED_DEBUG
 -		p->migrate_disable_atomic--;
 -#endif
 -		return;
 -	}
 -
 -#ifdef CONFIG_SCHED_DEBUG
 -	WARN_ON_ONCE(p->migrate_disable_atomic);
 -#endif
 -	WARN_ON_ONCE(p->migrate_disable <= 0);
 -
 -	preempt_disable();
 -	if (p->migrate_disable > 1) {
 -		p->migrate_disable--;
 -		preempt_enable();
 -		return;
 -	}
 -
 -	if (unlikely(!scheduler_running)) {
 -		p->migrate_disable = 0;
 -		unpin_current_cpu();
 -		preempt_enable();
 -		return;
 -	}
 -
 -	/*
 -	 * See comment in migrate_disable().
 -	 */
 -	rq = this_rq();
 -	raw_spin_lock_irqsave(&rq->lock, flags);
 -	mask = tsk_cpus_allowed(p);
 -	p->migrate_disable = 0;
 -
 -	WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
 -
 -	if (!cpumask_equal(&p->cpus_allowed, mask)) {
 -		/* Get the mask now that migration is enabled */
 -		mask = tsk_cpus_allowed(p);
 -		if (p->sched_class->set_cpus_allowed)
 -			p->sched_class->set_cpus_allowed(p, mask);
 -		p->nr_cpus_allowed = cpumask_weight(mask);
 -	}
 -
 -	raw_spin_unlock_irqrestore(&rq->lock, flags);
 -	unpin_current_cpu();
 -	preempt_enable();
 -}
 -EXPORT_SYMBOL(migrate_enable);
 -#endif /* CONFIG_PREEMPT_RT_FULL */
 -
  /*
   * Move (not current) task off this cpu, onto dest cpu. We're doing
   * this because either it can't run here any more (set_cpus_allowed()
	Subject: sched: Postpone actual migration disalbe to schedule
	From: Steven Rostedt <rostedt@goodmis.org>
	Date: Tue, 27 Sep 2011 08:40:23 -0400

	The migrate_disable() can cause a bit of a overhead to the RT kernel,
	as changing the affinity is expensive to do at every lock encountered.
	As a running task can not migrate, the actual disabling of migration
	does not need to occur until the task is about to schedule out.

	In most cases, a task that disables migration will enable it before
	it schedules making this change improve performance tremendously.

	[ Frank Rowand: UP compile fix ]

	Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
	Cc: Peter Zijlstra <peterz@infradead.org>
	Cc: Clark Williams <williams@redhat.com>
	Link: http://lkml.kernel.org/r/20110927124422.779693167@goodmis.org
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>

	---
	kernel/sched/core.c \| 251 +++++++++++++++++++++++++++-------------------------
	1 file changed, 132 insertions(+), 119 deletions(-)

	--- a/kernel/sched/core.c
	+++ b/kernel/sched/core.c
	@@ -2850,6 +2850,135 @@ static inline void schedule_debug(struct
	schedstat_inc(this_rq(), sched_count);
	}

	+#if defined(CONFIG_PREEMPT_RT_FULL) && defined(CONFIG_SMP)
	+#define MIGRATE_DISABLE_SET_AFFIN (1<<30) /* Can't make a negative */
	+#define migrate_disabled_updated(p) ((p)->migrate_disable & MIGRATE_DISABLE_SET_AFFIN)
	+#define migrate_disable_count(p) ((p)->migrate_disable & ~MIGRATE_DISABLE_SET_AFFIN)
	+
	+static inline void update_migrate_disable(struct task_struct *p)
	+{
	+ const struct cpumask *mask;
	+
	+ if (likely(!p->migrate_disable))
	+ return;
	+
	+ /* Did we already update affinity? */
	+ if (unlikely(migrate_disabled_updated(p)))
	+ return;
	+
	+ /*
	+ * Since this is always current we can get away with only locking
	+ * rq->lock, the ->cpus_allowed value can normally only be changed
	+ * while holding both p->pi_lock and rq->lock, but seeing that this
	+ * is current, we cannot actually be waking up, so all code that
	+ * relies on serialization against p->pi_lock is out of scope.
	+ *
	+ * Having rq->lock serializes us against things like
	+ * set_cpus_allowed_ptr() that can still happen concurrently.
	+ */
	+ mask = tsk_cpus_allowed(p);
	+
	+ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
	+
	+ if (!cpumask_equal(&p->cpus_allowed, mask)) {
	+ if (p->sched_class->set_cpus_allowed)
	+ p->sched_class->set_cpus_allowed(p, mask);
	+ p->nr_cpus_allowed = cpumask_weight(mask);
	+
	+ /* Let migrate_enable know to fix things back up */
	+ p->migrate_disable \|= MIGRATE_DISABLE_SET_AFFIN;
	+ }
	+}
	+
	+void migrate_disable(void)
	+{
	+ struct task_struct *p = current;
	+
	+ if (in_atomic()) {
	+#ifdef CONFIG_SCHED_DEBUG
	+ p->migrate_disable_atomic++;
	+#endif
	+ return;
	+ }
	+
	+#ifdef CONFIG_SCHED_DEBUG
	+ WARN_ON_ONCE(p->migrate_disable_atomic);
	+#endif
	+
	+ preempt_disable();
	+ if (p->migrate_disable) {
	+ p->migrate_disable++;
	+ preempt_enable();
	+ return;
	+ }
	+
	+ pin_current_cpu();
	+ p->migrate_disable = 1;
	+ preempt_enable();
	+}
	+EXPORT_SYMBOL(migrate_disable);
	+
	+void migrate_enable(void)
	+{
	+ struct task_struct *p = current;
	+ const struct cpumask *mask;
	+ unsigned long flags;
	+ struct rq *rq;
	+
	+ if (in_atomic()) {
	+#ifdef CONFIG_SCHED_DEBUG
	+ p->migrate_disable_atomic--;
	+#endif
	+ return;
	+ }
	+
	+#ifdef CONFIG_SCHED_DEBUG
	+ WARN_ON_ONCE(p->migrate_disable_atomic);
	+#endif
	+ WARN_ON_ONCE(p->migrate_disable <= 0);
	+
	+ preempt_disable();
	+ if (migrate_disable_count(p) > 1) {
	+ p->migrate_disable--;
	+ preempt_enable();
	+ return;
	+ }
	+
	+ if (unlikely(migrate_disabled_updated(p))) {
	+ /*
	+ * See comment in update_migrate_disable() about locking.
	+ */
	+ rq = this_rq();
	+ raw_spin_lock_irqsave(&rq->lock, flags);
	+ mask = tsk_cpus_allowed(p);
	+ /*
	+ * Clearing migrate_disable causes tsk_cpus_allowed to
	+ * show the tasks original cpu affinity.
	+ */
	+ p->migrate_disable = 0;
	+
	+ WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
	+
	+ if (unlikely(!cpumask_equal(&p->cpus_allowed, mask))) {
	+ /* Get the mask now that migration is enabled */
	+ mask = tsk_cpus_allowed(p);
	+ if (p->sched_class->set_cpus_allowed)
	+ p->sched_class->set_cpus_allowed(p, mask);
	+ p->nr_cpus_allowed = cpumask_weight(mask);
	+ }
	+ raw_spin_unlock_irqrestore(&rq->lock, flags);
	+ } else
	+ p->migrate_disable = 0;
	+
	+ unpin_current_cpu();
	+ preempt_enable();
	+}
	+EXPORT_SYMBOL(migrate_enable);
	+#else
	+static inline void update_migrate_disable(struct task_struct *p) { }
	+#define migrate_disabled_updated(p) 0
	+#endif
	+
	static void put_prev_task(struct rq rq, struct task_struct prev)
	{
	if (prev->on_rq \|\| rq->skip_clock_update < 0)
	@@ -2943,6 +3072,8 @@ need_resched:

	raw_spin_lock_irq(&rq->lock);

	+ update_migrate_disable(prev);
	+
	switch_count = &prev->nivcsw;
	if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
	if (unlikely(signal_pending_state(prev->state, prev))) {
	@@ -4736,7 +4867,7 @@ void __cpuinit init_idle(struct task_str
	#ifdef CONFIG_SMP
	void do_set_cpus_allowed(struct task_struct p, const struct cpumask new_mask)
	{
	- if (!__migrate_disabled(p)) {
	+ if (!migrate_disabled_updated(p)) {
	if (p->sched_class && p->sched_class->set_cpus_allowed)
	p->sched_class->set_cpus_allowed(p, new_mask);
	p->nr_cpus_allowed = cpumask_weight(new_mask);
	@@ -4811,124 +4942,6 @@ out:
	}
	EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);

	-#ifdef CONFIG_PREEMPT_RT_FULL
	-void migrate_disable(void)
	-{
	- struct task_struct *p = current;
	- const struct cpumask *mask;
	- unsigned long flags;
	- struct rq *rq;
	-
	- if (in_atomic()) {
	-#ifdef CONFIG_SCHED_DEBUG
	- p->migrate_disable_atomic++;
	-#endif
	- return;
	- }
	-
	-#ifdef CONFIG_SCHED_DEBUG
	- WARN_ON_ONCE(p->migrate_disable_atomic);
	-#endif
	-
	- preempt_disable();
	- if (p->migrate_disable) {
	- p->migrate_disable++;
	- preempt_enable();
	- return;
	- }
	-
	- pin_current_cpu();
	- if (unlikely(!scheduler_running)) {
	- p->migrate_disable = 1;
	- preempt_enable();
	- return;
	- }
	-
	- /*
	- * Since this is always current we can get away with only locking
	- * rq->lock, the ->cpus_allowed value can normally only be changed
	- * while holding both p->pi_lock and rq->lock, but seeing that this
	- * it current, we cannot actually be waking up, so all code that
	- * relies on serialization against p->pi_lock is out of scope.
	- *
	- * Taking rq->lock serializes us against things like
	- * set_cpus_allowed_ptr() that can still happen concurrently.
	- */
	- rq = this_rq();
	- raw_spin_lock_irqsave(&rq->lock, flags);
	- p->migrate_disable = 1;
	- mask = tsk_cpus_allowed(p);
	-
	- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
	-
	- if (!cpumask_equal(&p->cpus_allowed, mask)) {
	- if (p->sched_class->set_cpus_allowed)
	- p->sched_class->set_cpus_allowed(p, mask);
	- p->nr_cpus_allowed = cpumask_weight(mask);
	- }
	- raw_spin_unlock_irqrestore(&rq->lock, flags);
	- preempt_enable();
	-}
	-EXPORT_SYMBOL(migrate_disable);
	-
	-void migrate_enable(void)
	-{
	- struct task_struct *p = current;
	- const struct cpumask *mask;
	- unsigned long flags;
	- struct rq *rq;
	-
	- if (in_atomic()) {
	-#ifdef CONFIG_SCHED_DEBUG
	- p->migrate_disable_atomic--;
	-#endif
	- return;
	- }
	-
	-#ifdef CONFIG_SCHED_DEBUG
	- WARN_ON_ONCE(p->migrate_disable_atomic);
	-#endif
	- WARN_ON_ONCE(p->migrate_disable <= 0);
	-
	- preempt_disable();
	- if (p->migrate_disable > 1) {
	- p->migrate_disable--;
	- preempt_enable();
	- return;
	- }
	-
	- if (unlikely(!scheduler_running)) {
	- p->migrate_disable = 0;
	- unpin_current_cpu();
	- preempt_enable();
	- return;
	- }
	-
	- /*
	- * See comment in migrate_disable().
	- */
	- rq = this_rq();
	- raw_spin_lock_irqsave(&rq->lock, flags);
	- mask = tsk_cpus_allowed(p);
	- p->migrate_disable = 0;
	-
	- WARN_ON(!cpumask_test_cpu(smp_processor_id(), mask));
	-
	- if (!cpumask_equal(&p->cpus_allowed, mask)) {
	- /* Get the mask now that migration is enabled */
	- mask = tsk_cpus_allowed(p);
	- if (p->sched_class->set_cpus_allowed)
	- p->sched_class->set_cpus_allowed(p, mask);
	- p->nr_cpus_allowed = cpumask_weight(mask);
	- }
	-
	- raw_spin_unlock_irqrestore(&rq->lock, flags);
	- unpin_current_cpu();
	- preempt_enable();
	-}
	-EXPORT_SYMBOL(migrate_enable);
	-#endif /* CONFIG_PREEMPT_RT_FULL */
	-
	/*
	* Move (not current) task off this cpu, onto dest cpu. We're doing
	* this because either it can't run here any more (set_cpus_allowed()