patches/mm-perform-lru_add_drain_all-remotely.patch - pub/scm/linux/kernel/git/paulg/4.9-rt-patches - Git at Google

 From: Luiz Capitulino <lcapitulino@redhat.com>
 Date: Fri, 27 May 2016 15:03:28 +0200
 Subject: [PATCH] mm: perform lru_add_drain_all() remotely

 lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run
 on all CPUs that have non-empty LRU pagevecs and then waiting for
 the scheduled work to complete. However, workqueue threads may never
 have the chance to run on a CPU that's running a SCHED_FIFO task.
 This causes lru_add_drain_all() to block forever.

 This commit solves this problem by changing lru_add_drain_all()
 to drain the LRU pagevecs of remote CPUs. This is done by grabbing
 swapvec_lock and calling lru_add_drain_cpu().

 PS: This is based on an idea and initial implementation by
     Rik van Riel.

 Signed-off-by: Rik van Riel <riel@redhat.com>
 Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
 Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
 ---
  mm/swap.c |   42 ++++++++++++++++++++++++++++++++----------
  1 file changed, 32 insertions(+), 10 deletions(-)

 --- a/mm/swap.c
 +++ b/mm/swap.c
 @@ -597,9 +597,15 @@ void lru_add_drain_cpu(int cpu)
  		unsigned long flags;

  		/* No harm done if a racing interrupt already did this */
 +#ifdef CONFIG_PREEMPT_RT_BASE
 +		local_lock_irqsave_on(rotate_lock, flags, cpu);
 +		pagevec_move_tail(pvec);
 +		local_unlock_irqrestore_on(rotate_lock, flags, cpu);
 +#else
  		local_lock_irqsave(rotate_lock, flags);
  		pagevec_move_tail(pvec);
  		local_unlock_irqrestore(rotate_lock, flags);
 +#endif
  	}

  	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
 @@ -667,12 +673,15 @@ void lru_add_drain(void)
  	local_unlock_cpu(swapvec_lock);
  }

 -static void lru_add_drain_per_cpu(struct work_struct *dummy)
 +#ifdef CONFIG_PREEMPT_RT_BASE
 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
  {
 -	lru_add_drain();
 +	local_lock_on(swapvec_lock, cpu);
 +	lru_add_drain_cpu(cpu);
 +	local_unlock_on(swapvec_lock, cpu);
  }

 -static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 +#else

  /*
   * lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
 @@ -692,6 +701,22 @@ static int __init lru_init(void)
  }
  early_initcall(lru_init);

 +static void lru_add_drain_per_cpu(struct work_struct *dummy)
 +{
 +	lru_add_drain();
 +}
 +
 +static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
 +static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
 +{
 +	struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 +
 +	INIT_WORK(work, lru_add_drain_per_cpu);
 +	queue_work_on(cpu, lru_add_drain_wq, work);
 +	cpumask_set_cpu(cpu, has_work);
 +}
 +#endif
 +
  void lru_add_drain_all(void)
  {
  	static DEFINE_MUTEX(lock);
 @@ -703,21 +728,18 @@ void lru_add_drain_all(void)
  	cpumask_clear(&has_work);

  	for_each_online_cpu(cpu) {
 -		struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
 -
  		if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
  		    pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
  		    pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
  		    pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) ||
 -		    need_activate_page_drain(cpu)) {
 -			INIT_WORK(work, lru_add_drain_per_cpu);
 -			queue_work_on(cpu, lru_add_drain_wq, work);
 -			cpumask_set_cpu(cpu, &has_work);
 -		}
 +		    need_activate_page_drain(cpu))
 +			remote_lru_add_drain(cpu, &has_work);
  	}

 +#ifndef CONFIG_PREEMPT_RT_BASE
  	for_each_cpu(cpu, &has_work)
  		flush_work(&per_cpu(lru_add_drain_work, cpu));
 +#endif

  	put_online_cpus();
  	mutex_unlock(&lock);
	From: Luiz Capitulino <lcapitulino@redhat.com>
	Date: Fri, 27 May 2016 15:03:28 +0200
	Subject: [PATCH] mm: perform lru_add_drain_all() remotely

	lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run
	on all CPUs that have non-empty LRU pagevecs and then waiting for
	the scheduled work to complete. However, workqueue threads may never
	have the chance to run on a CPU that's running a SCHED_FIFO task.
	This causes lru_add_drain_all() to block forever.

	This commit solves this problem by changing lru_add_drain_all()
	to drain the LRU pagevecs of remote CPUs. This is done by grabbing
	swapvec_lock and calling lru_add_drain_cpu().

	PS: This is based on an idea and initial implementation by
	Rik van Riel.

	Signed-off-by: Rik van Riel <riel@redhat.com>
	Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
	Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
	---
	mm/swap.c \| 42 ++++++++++++++++++++++++++++++++----------
	1 file changed, 32 insertions(+), 10 deletions(-)

	--- a/mm/swap.c
	+++ b/mm/swap.c
	@@ -597,9 +597,15 @@ void lru_add_drain_cpu(int cpu)
	unsigned long flags;

	/* No harm done if a racing interrupt already did this */
	+#ifdef CONFIG_PREEMPT_RT_BASE
	+ local_lock_irqsave_on(rotate_lock, flags, cpu);
	+ pagevec_move_tail(pvec);
	+ local_unlock_irqrestore_on(rotate_lock, flags, cpu);
	+#else
	local_lock_irqsave(rotate_lock, flags);
	pagevec_move_tail(pvec);
	local_unlock_irqrestore(rotate_lock, flags);
	+#endif
	}

	pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
	@@ -667,12 +673,15 @@ void lru_add_drain(void)
	local_unlock_cpu(swapvec_lock);
	}

	-static void lru_add_drain_per_cpu(struct work_struct *dummy)
	+#ifdef CONFIG_PREEMPT_RT_BASE
	+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
	{
	- lru_add_drain();
	+ local_lock_on(swapvec_lock, cpu);
	+ lru_add_drain_cpu(cpu);
	+ local_unlock_on(swapvec_lock, cpu);
	}

	-static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
	+#else

	/*
	* lru_add_drain_wq is used to do lru_add_drain_all() from a WQ_MEM_RECLAIM
	@@ -692,6 +701,22 @@ static int __init lru_init(void)
	}
	early_initcall(lru_init);

	+static void lru_add_drain_per_cpu(struct work_struct *dummy)
	+{
	+ lru_add_drain();
	+}
	+
	+static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
	+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
	+{
	+ struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
	+
	+ INIT_WORK(work, lru_add_drain_per_cpu);
	+ queue_work_on(cpu, lru_add_drain_wq, work);
	+ cpumask_set_cpu(cpu, has_work);
	+}
	+#endif
	+
	void lru_add_drain_all(void)
	{
	static DEFINE_MUTEX(lock);
	@@ -703,21 +728,18 @@ void lru_add_drain_all(void)
	cpumask_clear(&has_work);

	for_each_online_cpu(cpu) {
	- struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
	-
	if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) \|\|
	pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) \|\|
	pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) \|\|
	pagevec_count(&per_cpu(lru_deactivate_pvecs, cpu)) \|\|
	- need_activate_page_drain(cpu)) {
	- INIT_WORK(work, lru_add_drain_per_cpu);
	- queue_work_on(cpu, lru_add_drain_wq, work);
	- cpumask_set_cpu(cpu, &has_work);
	- }
	+ need_activate_page_drain(cpu))
	+ remote_lru_add_drain(cpu, &has_work);
	}

	+#ifndef CONFIG_PREEMPT_RT_BASE
	for_each_cpu(cpu, &has_work)
	flush_work(&per_cpu(lru_add_drain_work, cpu));
	+#endif

	put_online_cpus();
	mutex_unlock(&lock);