powerpc-mmu-gather-and-tlb-fixes.patch - pub/scm/linux/kernel/git/paulg/rt-patches - Git at Google

 From 332fa27b9ddc497f6e629cb92fc8339706fe5b12 Mon Sep 17 00:00:00 2001
 From: Peter Zijlstra <peterz@infradead.org>
 Date: Fri, 3 Jul 2009 08:44:51 -0500
 Subject: [PATCH] powerpc: mmu gather and tlb fixes

 commit e7e7fcb92fc6ef2cdb8cc070e9c0d26f03f7b8eb in tip.

 Signed-off-by: Peter Zijlstra <peterz@infradead.org>
 Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
 Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

 diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
 index abe8532..df1b4cb 100644
 --- a/arch/powerpc/include/asm/pgalloc.h
 +++ b/arch/powerpc/include/asm/pgalloc.h
 @@ -32,13 +32,13 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)

  #ifdef CONFIG_SMP
  extern void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift);
 -extern void pte_free_finish(void);
 +extern void pte_free_finish(struct mmu_gather *tlb);
  #else /* CONFIG_SMP */
  static inline void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
  {
  	pgtable_free(table, shift);
  }
 -static inline void pte_free_finish(void) { }
 +static inline void pte_free_finish(struct mmu_gather *tlb) { }
  #endif /* !CONFIG_SMP */

  static inline void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage,
 diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
 index 4986504..3b29c95 100644
 --- a/arch/powerpc/include/asm/pgtable-ppc64.h
 +++ b/arch/powerpc/include/asm/pgtable-ppc64.h
 @@ -230,8 +230,15 @@ static inline unsigned long pte_update(struct mm_struct *mm,
  		assert_pte_locked(mm, addr);

  #ifdef CONFIG_PPC_STD_MMU_64
 -	if (old & _PAGE_HASHPTE)
 +	if (old & _PAGE_HASHPTE) {
 +#ifdef CONFIG_PREEMPT_RT
 +		preempt_disable();
 +#endif
  		hpte_need_flush(mm, addr, ptep, old, huge);
 +#ifdef CONFIG_PREEMPT_RT
 +		preempt_enable();
 +#endif
 +	}
  #endif

  	return old;
 diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
 index e2b428b..8f0ed7a 100644
 --- a/arch/powerpc/include/asm/tlb.h
 +++ b/arch/powerpc/include/asm/tlb.h
 @@ -28,6 +28,16 @@
  #define tlb_start_vma(tlb, vma)	do { } while (0)
  #define tlb_end_vma(tlb, vma)	do { } while (0)

 +#define HAVE_ARCH_MMU_GATHER 1
 +
 +struct pte_freelist_batch;
 +
 +struct arch_mmu_gather {
 +	struct pte_freelist_batch *batch;
 +};
 +
 +#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
 +
  extern void tlb_flush(struct mmu_gather *tlb);

  /* Get the generic bits... */
 diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
 index d50a380..b594942 100644
 --- a/arch/powerpc/include/asm/tlbflush.h
 +++ b/arch/powerpc/include/asm/tlbflush.h
 @@ -108,18 +108,25 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,

  static inline void arch_enter_lazy_mmu_mode(void)
  {
 -	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 +	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

  	batch->active = 1;
 +
 +	put_cpu_var(ppc64_tlb_batch);
  }

  static inline void arch_leave_lazy_mmu_mode(void)
  {
 -	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 +	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
 +
 +	if (batch->active) {
 +		if (batch->index) {
 +			__flush_tlb_pending(batch);
 +		}
 +		batch->active = 0;
 +	}

 -	if (batch->index)
 -		__flush_tlb_pending(batch);
 -	batch->active = 0;
 +	put_cpu_var(ppc64_tlb_batch);
  }

  #define arch_flush_lazy_mmu_mode()      do {} while (0)
 diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
 index 7b816da..3ec4ca4 100644
 --- a/arch/powerpc/kernel/process.c
 +++ b/arch/powerpc/kernel/process.c
 @@ -304,6 +304,10 @@ struct task_struct *__switch_to(struct task_struct *prev,
  	struct thread_struct *new_thread, *old_thread;
  	unsigned long flags;
  	struct task_struct *last;
 +#if defined(CONFIG_PPC64) && defined (CONFIG_PREEMPT_RT)
 +	struct ppc64_tlb_batch *batch;
 +	int hadbatch;
 +#endif

  #ifdef CONFIG_SMP
  	/* avoid complexity of lazy save/restore of fpu
 @@ -396,6 +400,17 @@ struct task_struct *__switch_to(struct task_struct *prev,
  		old_thread->accum_tb += (current_tb - start_tb);
  		new_thread->start_tb = current_tb;
  	}
 +
 +#ifdef CONFIG_PREEMPT_RT
 +	batch = &__get_cpu_var(ppc64_tlb_batch);
 +	if (batch->active) {
 +		hadbatch = 1;
 +		if (batch->index) {
 +			__flush_tlb_pending(batch);
 +		}
 +		batch->active = 0;
 +	}
 +#endif /* #ifdef CONFIG_PREEMPT_RT */
  #endif

  	local_irq_save(flags);
 @@ -414,6 +429,13 @@ struct task_struct *__switch_to(struct task_struct *prev,

  	local_irq_restore(flags);

 +#if defined(CONFIG_PPC64) && defined(CONFIG_PREEMPT_RT)
 +	if (hadbatch) {
 +		batch = &__get_cpu_var(ppc64_tlb_batch);
 +		batch->active = 1;
 +	}
 +#endif
 +
  	return last;
  }

 diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
 index 99df697..4243a84 100644
 --- a/arch/powerpc/mm/pgtable.c
 +++ b/arch/powerpc/mm/pgtable.c
 @@ -32,8 +32,6 @@

  #include "mmu_decl.h"

 -DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
 -
  #ifdef CONFIG_SMP

  /*
 @@ -42,7 +40,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
   * freeing a page table page that is being walked without locks
   */

 -static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
  static unsigned long pte_freelist_forced_free;

  struct pte_freelist_batch
 @@ -97,12 +94,12 @@ static void pte_free_submit(struct pte_freelist_batch *batch)

  void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
  {
 -	/* This is safe since tlb_gather_mmu has disabled preemption */
 -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
 +	struct pte_freelist_batch **batchp = &tlb->arch.batch;
  	unsigned long pgf;

 -	if (atomic_read(&tlb->mm->mm_users) < 2 ||
 -	    cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
 +	/* CHECKME */
 +
 +	if (atomic_read(&tlb->mm->mm_users) < 2) {
  		pgtable_free(table, shift);
  		return;
  	}
 @@ -124,15 +121,14 @@ void pgtable_free_tlb(struct mmu_gather *tlb, void *table, unsigned shift)
  	}
  }

 -void pte_free_finish(void)
 +void pte_free_finish(struct mmu_gather *tlb)
  {
 -	/* This is safe since tlb_gather_mmu has disabled preemption */
 -	struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
 +	struct pte_freelist_batch **batchp = &tlb->arch.batch;

 -	if (*batchp == NULL)
 -		return;
 -	pte_free_submit(*batchp);
 -	*batchp = NULL;
 +	if (*batchp) {
 +		pte_free_submit(*batchp);
 +		*batchp = NULL;
 +	}
  }

  #endif /* CONFIG_SMP */
 diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
 index 8aaa8b7..3b0b3d8 100644
 --- a/arch/powerpc/mm/tlb_hash32.c
 +++ b/arch/powerpc/mm/tlb_hash32.c
 @@ -73,7 +73,7 @@ void tlb_flush(struct mmu_gather *tlb)
  	}

  	/* Push out batch of freed page tables */
 -	pte_free_finish();
 +	pte_free_finish(tlb);
  }

  /*
 diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
 index 1ec0657..f290f6c 100644
 --- a/arch/powerpc/mm/tlb_hash64.c
 +++ b/arch/powerpc/mm/tlb_hash64.c
 @@ -30,6 +30,7 @@
  #include <asm/tlbflush.h>
  #include <asm/tlb.h>
  #include <asm/bug.h>
 +#include <asm/machdep.h>

  DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);

 @@ -44,7 +45,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
  void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
  		     pte_t *ptep, unsigned long pte, int huge)
  {
 -	struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
 +	struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
  	unsigned long vsid, vaddr;
  	unsigned int psize;
  	int ssize;
 @@ -99,6 +100,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
  	 */
  	if (!batch->active) {
  		flush_hash_page(vaddr, rpte, psize, ssize, 0);
 +		put_cpu_var(ppc64_tlb_batch);
  		return;
  	}

 @@ -125,8 +127,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
  	batch->pte[i] = rpte;
  	batch->vaddr[i] = vaddr;
  	batch->index = ++i;
 +
 +#ifdef CONFIG_PREEMPT_RT
 +	/*
 +	 * Since flushing tlb needs expensive hypervisor call(s) on celleb,
 +	 * always flush it on RT to reduce scheduling latency.
 +	 */
 +	if (machine_is(celleb)) {
 +		__flush_tlb_pending(batch);
 +		put_cpu_var(ppc64_tlb_batch);
 +		return;
 +	}
 +#endif /* CONFIG_PREEMPT_RT */
 +
  	if (i >= PPC64_TLB_BATCH_NR)
  		__flush_tlb_pending(batch);
 +	put_cpu_var(ppc64_tlb_batch);
  }

  /*
 @@ -155,7 +171,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)

  void tlb_flush(struct mmu_gather *tlb)
  {
 -	struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
 +	struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);

  	/* If there's a TLB batch pending, then we must flush it because the
  	 * pages are going to be freed and we really don't want to have a CPU
 @@ -165,7 +181,8 @@ void tlb_flush(struct mmu_gather *tlb)
  		__flush_tlb_pending(tlbbatch);

  	/* Push out batch of freed page tables */
 -	pte_free_finish();
 +	put_cpu_var(ppc64_tlb_batch);
 +	pte_free_finish(tlb);
  }

  /**
 diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
 index a65f1d0..c242127 100644
 --- a/arch/powerpc/mm/tlb_nohash.c
 +++ b/arch/powerpc/mm/tlb_nohash.c
 @@ -300,7 +300,7 @@ void tlb_flush(struct mmu_gather *tlb)
  	flush_tlb_mm(tlb->mm);

  	/* Push out batch of freed page tables */
 -	pte_free_finish();
 +	pte_free_finish(tlb);
  }

  /*
 diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
 index 1a0000a..902987d 100644
 --- a/arch/powerpc/platforms/pseries/iommu.c
 +++ b/arch/powerpc/platforms/pseries/iommu.c
 @@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
  	return ret;
  }

 -static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
 +static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;

  static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  				     long npages, unsigned long uaddr,
 @@ -154,13 +154,14 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  	long l, limit;
  	long tcenum_start = tcenum, npages_start = npages;
  	int ret = 0;
 +	int cpu;

  	if (npages == 1) {
  		return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
  		                           direction, attrs);
  	}

 -	tcep = __get_cpu_var(tce_page);
 +	tcep = get_cpu_var_locked(tce_page, &cpu);

  	/* This is safe to do since interrupts are off when we're called
  	 * from iommu_alloc{,_sg}()
 @@ -169,10 +170,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  		tcep = (u64 *)__get_free_page(GFP_ATOMIC);
  		/* If allocation fails, fall back to the loop implementation */
  		if (!tcep) {
 +			put_cpu_var_locked(tce_page, cpu);
  			return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
  					    direction, attrs);
  		}
 -		__get_cpu_var(tce_page) = tcep;
 +		per_cpu_var_locked(tce_page, cpu) = tcep;
  	}

  	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
 @@ -216,6 +218,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
  		printk("\ttce[0] val = 0x%llx\n", tcep[0]);
  		show_stack(current, (unsigned long *)__get_SP());
  	}
 +	put_cpu_var_locked(tce_page, cpu);
  	return ret;
  }

 --
 1.7.1.1
	From 332fa27b9ddc497f6e629cb92fc8339706fe5b12 Mon Sep 17 00:00:00 2001
	From: Peter Zijlstra <peterz@infradead.org>
	Date: Fri, 3 Jul 2009 08:44:51 -0500
	Subject: [PATCH] powerpc: mmu gather and tlb fixes

	commit e7e7fcb92fc6ef2cdb8cc070e9c0d26f03f7b8eb in tip.

	Signed-off-by: Peter Zijlstra <peterz@infradead.org>
	Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
	Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com>

	diff --git a/arch/powerpc/include/asm/pgalloc.h b/arch/powerpc/include/asm/pgalloc.h
	index abe8532..df1b4cb 100644
	--- a/arch/powerpc/include/asm/pgalloc.h
	+++ b/arch/powerpc/include/asm/pgalloc.h
	@@ -32,13 +32,13 @@ static inline void pte_free(struct mm_struct *mm, pgtable_t ptepage)

	#ifdef CONFIG_SMP
	extern void pgtable_free_tlb(struct mmu_gather tlb, void table, unsigned shift);
	-extern void pte_free_finish(void);
	+extern void pte_free_finish(struct mmu_gather *tlb);
	#else /* CONFIG_SMP */
	static inline void pgtable_free_tlb(struct mmu_gather tlb, void table, unsigned shift)
	{
	pgtable_free(table, shift);
	}
	-static inline void pte_free_finish(void) { }
	+static inline void pte_free_finish(struct mmu_gather *tlb) { }
	#endif /* !CONFIG_SMP */

	static inline void __pte_free_tlb(struct mmu_gather tlb, struct page ptepage,
	diff --git a/arch/powerpc/include/asm/pgtable-ppc64.h b/arch/powerpc/include/asm/pgtable-ppc64.h
	index 4986504..3b29c95 100644
	--- a/arch/powerpc/include/asm/pgtable-ppc64.h
	+++ b/arch/powerpc/include/asm/pgtable-ppc64.h
	@@ -230,8 +230,15 @@ static inline unsigned long pte_update(struct mm_struct *mm,
	assert_pte_locked(mm, addr);

	#ifdef CONFIG_PPC_STD_MMU_64
	- if (old & _PAGE_HASHPTE)
	+ if (old & _PAGE_HASHPTE) {
	+#ifdef CONFIG_PREEMPT_RT
	+ preempt_disable();
	+#endif
	hpte_need_flush(mm, addr, ptep, old, huge);
	+#ifdef CONFIG_PREEMPT_RT
	+ preempt_enable();
	+#endif
	+ }
	#endif

	return old;
	diff --git a/arch/powerpc/include/asm/tlb.h b/arch/powerpc/include/asm/tlb.h
	index e2b428b..8f0ed7a 100644
	--- a/arch/powerpc/include/asm/tlb.h
	+++ b/arch/powerpc/include/asm/tlb.h
	@@ -28,6 +28,16 @@
	#define tlb_start_vma(tlb, vma) do { } while (0)
	#define tlb_end_vma(tlb, vma) do { } while (0)

	+#define HAVE_ARCH_MMU_GATHER 1
	+
	+struct pte_freelist_batch;
	+
	+struct arch_mmu_gather {
	+ struct pte_freelist_batch *batch;
	+};
	+
	+#define ARCH_MMU_GATHER_INIT (struct arch_mmu_gather){ .batch = NULL, }
	+
	extern void tlb_flush(struct mmu_gather *tlb);

	/* Get the generic bits... */
	diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h
	index d50a380..b594942 100644
	--- a/arch/powerpc/include/asm/tlbflush.h
	+++ b/arch/powerpc/include/asm/tlbflush.h
	@@ -108,18 +108,25 @@ extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr,

	static inline void arch_enter_lazy_mmu_mode(void)
	{
	- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
	+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);

	batch->active = 1;
	+
	+ put_cpu_var(ppc64_tlb_batch);
	}

	static inline void arch_leave_lazy_mmu_mode(void)
	{
	- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
	+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
	+
	+ if (batch->active) {
	+ if (batch->index) {
	+ __flush_tlb_pending(batch);
	+ }
	+ batch->active = 0;
	+ }

	- if (batch->index)
	- __flush_tlb_pending(batch);
	- batch->active = 0;
	+ put_cpu_var(ppc64_tlb_batch);
	}

	#define arch_flush_lazy_mmu_mode() do {} while (0)
	diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
	index 7b816da..3ec4ca4 100644
	--- a/arch/powerpc/kernel/process.c
	+++ b/arch/powerpc/kernel/process.c
	@@ -304,6 +304,10 @@ struct task_struct __switch_to(struct task_struct prev,
	struct thread_struct new_thread, old_thread;
	unsigned long flags;
	struct task_struct *last;
	+#if defined(CONFIG_PPC64) && defined (CONFIG_PREEMPT_RT)
	+ struct ppc64_tlb_batch *batch;
	+ int hadbatch;
	+#endif

	#ifdef CONFIG_SMP
	/* avoid complexity of lazy save/restore of fpu
	@@ -396,6 +400,17 @@ struct task_struct __switch_to(struct task_struct prev,
	old_thread->accum_tb += (current_tb - start_tb);
	new_thread->start_tb = current_tb;
	}
	+
	+#ifdef CONFIG_PREEMPT_RT
	+ batch = &__get_cpu_var(ppc64_tlb_batch);
	+ if (batch->active) {
	+ hadbatch = 1;
	+ if (batch->index) {
	+ __flush_tlb_pending(batch);
	+ }
	+ batch->active = 0;
	+ }
	+#endif /* #ifdef CONFIG_PREEMPT_RT */
	#endif

	local_irq_save(flags);
	@@ -414,6 +429,13 @@ struct task_struct __switch_to(struct task_struct prev,

	local_irq_restore(flags);

	+#if defined(CONFIG_PPC64) && defined(CONFIG_PREEMPT_RT)
	+ if (hadbatch) {
	+ batch = &__get_cpu_var(ppc64_tlb_batch);
	+ batch->active = 1;
	+ }
	+#endif
	+
	return last;
	}

	diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
	index 99df697..4243a84 100644
	--- a/arch/powerpc/mm/pgtable.c
	+++ b/arch/powerpc/mm/pgtable.c
	@@ -32,8 +32,6 @@

	#include "mmu_decl.h"

	-DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
	-
	#ifdef CONFIG_SMP

	/*
	@@ -42,7 +40,6 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers);
	* freeing a page table page that is being walked without locks
	*/

	-static DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur);
	static unsigned long pte_freelist_forced_free;

	struct pte_freelist_batch
	@@ -97,12 +94,12 @@ static void pte_free_submit(struct pte_freelist_batch *batch)

	void pgtable_free_tlb(struct mmu_gather tlb, void table, unsigned shift)
	{
	- /* This is safe since tlb_gather_mmu has disabled preemption */
	- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
	+ struct pte_freelist_batch **batchp = &tlb->arch.batch;
	unsigned long pgf;

	- if (atomic_read(&tlb->mm->mm_users) < 2 \|\|
	- cpumask_equal(mm_cpumask(tlb->mm), cpumask_of(smp_processor_id()))){
	+ /* CHECKME */
	+
	+ if (atomic_read(&tlb->mm->mm_users) < 2) {
	pgtable_free(table, shift);
	return;
	}
	@@ -124,15 +121,14 @@ void pgtable_free_tlb(struct mmu_gather tlb, void table, unsigned shift)
	}
	}

	-void pte_free_finish(void)
	+void pte_free_finish(struct mmu_gather *tlb)
	{
	- /* This is safe since tlb_gather_mmu has disabled preemption */
	- struct pte_freelist_batch **batchp = &__get_cpu_var(pte_freelist_cur);
	+ struct pte_freelist_batch **batchp = &tlb->arch.batch;

	- if (*batchp == NULL)
	- return;
	- pte_free_submit(*batchp);
	- *batchp = NULL;
	+ if (*batchp) {
	+ pte_free_submit(*batchp);
	+ *batchp = NULL;
	+ }
	}

	#endif /* CONFIG_SMP */
	diff --git a/arch/powerpc/mm/tlb_hash32.c b/arch/powerpc/mm/tlb_hash32.c
	index 8aaa8b7..3b0b3d8 100644
	--- a/arch/powerpc/mm/tlb_hash32.c
	+++ b/arch/powerpc/mm/tlb_hash32.c
	@@ -73,7 +73,7 @@ void tlb_flush(struct mmu_gather *tlb)
	}

	/* Push out batch of freed page tables */
	- pte_free_finish();
	+ pte_free_finish(tlb);
	}

	/*
	diff --git a/arch/powerpc/mm/tlb_hash64.c b/arch/powerpc/mm/tlb_hash64.c
	index 1ec0657..f290f6c 100644
	--- a/arch/powerpc/mm/tlb_hash64.c
	+++ b/arch/powerpc/mm/tlb_hash64.c
	@@ -30,6 +30,7 @@
	#include <asm/tlbflush.h>
	#include <asm/tlb.h>
	#include <asm/bug.h>
	+#include <asm/machdep.h>

	DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);

	@@ -44,7 +45,7 @@ DEFINE_PER_CPU(struct ppc64_tlb_batch, ppc64_tlb_batch);
	void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
	pte_t *ptep, unsigned long pte, int huge)
	{
	- struct ppc64_tlb_batch *batch = &__get_cpu_var(ppc64_tlb_batch);
	+ struct ppc64_tlb_batch *batch = &get_cpu_var(ppc64_tlb_batch);
	unsigned long vsid, vaddr;
	unsigned int psize;
	int ssize;
	@@ -99,6 +100,7 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
	*/
	if (!batch->active) {
	flush_hash_page(vaddr, rpte, psize, ssize, 0);
	+ put_cpu_var(ppc64_tlb_batch);
	return;
	}

	@@ -125,8 +127,22 @@ void hpte_need_flush(struct mm_struct *mm, unsigned long addr,
	batch->pte[i] = rpte;
	batch->vaddr[i] = vaddr;
	batch->index = ++i;
	+
	+#ifdef CONFIG_PREEMPT_RT
	+ /*
	+ * Since flushing tlb needs expensive hypervisor call(s) on celleb,
	+ * always flush it on RT to reduce scheduling latency.
	+ */
	+ if (machine_is(celleb)) {
	+ __flush_tlb_pending(batch);
	+ put_cpu_var(ppc64_tlb_batch);
	+ return;
	+ }
	+#endif /* CONFIG_PREEMPT_RT */
	+
	if (i >= PPC64_TLB_BATCH_NR)
	__flush_tlb_pending(batch);
	+ put_cpu_var(ppc64_tlb_batch);
	}

	/*
	@@ -155,7 +171,7 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch)

	void tlb_flush(struct mmu_gather *tlb)
	{
	- struct ppc64_tlb_batch *tlbbatch = &__get_cpu_var(ppc64_tlb_batch);
	+ struct ppc64_tlb_batch *tlbbatch = &get_cpu_var(ppc64_tlb_batch);

	/* If there's a TLB batch pending, then we must flush it because the
	* pages are going to be freed and we really don't want to have a CPU
	@@ -165,7 +181,8 @@ void tlb_flush(struct mmu_gather *tlb)
	__flush_tlb_pending(tlbbatch);

	/* Push out batch of freed page tables */
	- pte_free_finish();
	+ put_cpu_var(ppc64_tlb_batch);
	+ pte_free_finish(tlb);
	}

	/**
	diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c
	index a65f1d0..c242127 100644
	--- a/arch/powerpc/mm/tlb_nohash.c
	+++ b/arch/powerpc/mm/tlb_nohash.c
	@@ -300,7 +300,7 @@ void tlb_flush(struct mmu_gather *tlb)
	flush_tlb_mm(tlb->mm);

	/* Push out batch of freed page tables */
	- pte_free_finish();
	+ pte_free_finish(tlb);
	}

	/*
	diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
	index 1a0000a..902987d 100644
	--- a/arch/powerpc/platforms/pseries/iommu.c
	+++ b/arch/powerpc/platforms/pseries/iommu.c
	@@ -140,7 +140,7 @@ static int tce_build_pSeriesLP(struct iommu_table *tbl, long tcenum,
	return ret;
	}

	-static DEFINE_PER_CPU(u64 *, tce_page) = NULL;
	+static DEFINE_PER_CPU_LOCKED(u64 *, tce_page) = NULL;

	static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
	long npages, unsigned long uaddr,
	@@ -154,13 +154,14 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
	long l, limit;
	long tcenum_start = tcenum, npages_start = npages;
	int ret = 0;
	+ int cpu;

	if (npages == 1) {
	return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
	direction, attrs);
	}

	- tcep = __get_cpu_var(tce_page);
	+ tcep = get_cpu_var_locked(tce_page, &cpu);

	/* This is safe to do since interrupts are off when we're called
	* from iommu_alloc{,_sg}()
	@@ -169,10 +170,11 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
	tcep = (u64 *)__get_free_page(GFP_ATOMIC);
	/* If allocation fails, fall back to the loop implementation */
	if (!tcep) {
	+ put_cpu_var_locked(tce_page, cpu);
	return tce_build_pSeriesLP(tbl, tcenum, npages, uaddr,
	direction, attrs);
	}
	- __get_cpu_var(tce_page) = tcep;
	+ per_cpu_var_locked(tce_page, cpu) = tcep;
	}

	rpn = (virt_to_abs(uaddr)) >> TCE_SHIFT;
	@@ -216,6 +218,7 @@ static int tce_buildmulti_pSeriesLP(struct iommu_table *tbl, long tcenum,
	printk("\ttce[0] val = 0x%llx\n", tcep[0]);
	show_stack(current, (unsigned long *)__get_SP());
	}
	+ put_cpu_var_locked(tce_page, cpu);
	return ret;
	}

	--
	1.7.1.1