releases/3.2.98/x86-mm-sched-core-uninline-switch_mm.patch - pub/scm/linux/kernel/git/bwh/linux-stable-queue - Git at Google

 From: Andy Lutomirski <luto@kernel.org>
 Date: Tue, 26 Apr 2016 09:39:08 -0700
 Subject: x86/mm, sched/core: Uninline switch_mm()

 commit 69c0319aabba45bcf33178916a2f06967b4adede upstream.

 It's fairly large and it has quite a few callers.  This may also
 help untangle some headers down the road.

 Signed-off-by: Andy Lutomirski <luto@kernel.org>
 Reviewed-by: Borislav Petkov <bp@suse.de>
 Cc: Borislav Petkov <bp@alien8.de>
 Cc: Linus Torvalds <torvalds@linux-foundation.org>
 Cc: Peter Zijlstra <peterz@infradead.org>
 Cc: Thomas Gleixner <tglx@linutronix.de>
 Link: http://lkml.kernel.org/r/54f3367803e7f80b2be62c8a21879aa74b1a5f57.1461688545.git.luto@kernel.org
 Signed-off-by: Ingo Molnar <mingo@kernel.org>
 [Hugh Dickins: Backported to 3.2]
 Signed-off-by: Hugh Dickins <hughd@google.com>
 Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
 ---
  arch/x86/include/asm/mmu_context.h | 72 +-----------------------------------
  arch/x86/mm/tlb.c                  | 75 ++++++++++++++++++++++++++++++++++++++
  2 files changed, 77 insertions(+), 70 deletions(-)

 diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
 index babbcd18a7a7..4a5d9b52d4f8 100644
 --- a/arch/x86/include/asm/mmu_context.h
 +++ b/arch/x86/include/asm/mmu_context.h
 @@ -75,76 +75,8 @@ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
  #endif
  }

 -static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 -			     struct task_struct *tsk)
 -{
 -	unsigned cpu = smp_processor_id();
 -
 -	if (likely(prev != next)) {
 -#ifdef CONFIG_SMP
 -		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 -		percpu_write(cpu_tlbstate.active_mm, next);
 -#endif
 -		cpumask_set_cpu(cpu, mm_cpumask(next));
 -
 -		/*
 -		 * Re-load page tables.
 -		 *
 -		 * This logic has an ordering constraint:
 -		 *
 -		 *  CPU 0: Write to a PTE for 'next'
 -		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
 -		 *  CPU 1: set bit 1 in next's mm_cpumask
 -		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
 -		 *
 -		 * We need to prevent an outcome in which CPU 1 observes
 -		 * the new PTE value and CPU 0 observes bit 1 clear in
 -		 * mm_cpumask.  (If that occurs, then the IPI will never
 -		 * be sent, and CPU 0's TLB will contain a stale entry.)
 -		 *
 -		 * The bad outcome can occur if either CPU's load is
 -		 * reordered before that CPU's store, so both CPUs must
 -		 * execute full barriers to prevent this from happening.
 -		 *
 -		 * Thus, switch_mm needs a full barrier between the
 -		 * store to mm_cpumask and any operation that could load
 -		 * from next->pgd.  TLB fills are special and can happen
 -		 * due to instruction fetches or for no reason at all,
 -		 * and neither LOCK nor MFENCE orders them.
 -		 * Fortunately, load_cr3() is serializing and gives the
 -		 * ordering guarantee we need.
 -		 *
 -		 */
 -		load_cr3(next->pgd);
 -
 -		/* stop flush ipis for the previous mm */
 -		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 -
 -		/*
 -		 * load the LDT, if the LDT is different:
 -		 */
 -		if (unlikely(prev->context.ldt != next->context.ldt))
 -			load_mm_ldt(next);
 -	}
 -#ifdef CONFIG_SMP
 -	else {
 -		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 -		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 -
 -		if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
 -			/* We were in lazy tlb mode and leave_mm disabled
 -			 * tlb flush IPI delivery. We must reload CR3
 -			 * to make sure to use no freed page tables.
 -			 *
 -			 * As above, load_cr3() is serializing and orders TLB
 -			 * fills with respect to the mm_cpumask write.
 -			 */
 -			load_cr3(next->pgd);
 -			load_mm_ldt(next);
 -		}
 -	}
 -#endif
 -}
 +extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 +		      struct task_struct *tsk);

  #define activate_mm(prev, next)			\
  do {						\
 diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
 index 8b731b6b4613..96481dad7532 100644
 --- a/arch/x86/mm/tlb.c
 +++ b/arch/x86/mm/tlb.c
 @@ -71,6 +71,81 @@ void leave_mm(int cpu)
  }
  EXPORT_SYMBOL_GPL(leave_mm);

 +#endif /* CONFIG_SMP */
 +
 +void switch_mm(struct mm_struct *prev, struct mm_struct *next,
 +	       struct task_struct *tsk)
 +{
 +	unsigned cpu = smp_processor_id();
 +
 +	if (likely(prev != next)) {
 +#ifdef CONFIG_SMP
 +		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 +		percpu_write(cpu_tlbstate.active_mm, next);
 +#endif
 +		cpumask_set_cpu(cpu, mm_cpumask(next));
 +
 +		/*
 +		 * Re-load page tables.
 +		 *
 +		 * This logic has an ordering constraint:
 +		 *
 +		 *  CPU 0: Write to a PTE for 'next'
 +		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
 +		 *  CPU 1: set bit 1 in next's mm_cpumask
 +		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
 +		 *
 +		 * We need to prevent an outcome in which CPU 1 observes
 +		 * the new PTE value and CPU 0 observes bit 1 clear in
 +		 * mm_cpumask.  (If that occurs, then the IPI will never
 +		 * be sent, and CPU 0's TLB will contain a stale entry.)
 +		 *
 +		 * The bad outcome can occur if either CPU's load is
 +		 * reordered before that CPU's store, so both CPUs must
 +		 * execute full barriers to prevent this from happening.
 +		 *
 +		 * Thus, switch_mm needs a full barrier between the
 +		 * store to mm_cpumask and any operation that could load
 +		 * from next->pgd.  TLB fills are special and can happen
 +		 * due to instruction fetches or for no reason at all,
 +		 * and neither LOCK nor MFENCE orders them.
 +		 * Fortunately, load_cr3() is serializing and gives the
 +		 * ordering guarantee we need.
 +		 *
 +		 */
 +		load_cr3(next->pgd);
 +
 +		/* stop flush ipis for the previous mm */
 +		cpumask_clear_cpu(cpu, mm_cpumask(prev));
 +
 +		/*
 +		 * load the LDT, if the LDT is different:
 +		 */
 +		if (unlikely(prev->context.ldt != next->context.ldt))
 +			load_mm_ldt(next);
 +	}
 +#ifdef CONFIG_SMP
 +	else {
 +		percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
 +		BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
 +
 +		if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
 +			/* We were in lazy tlb mode and leave_mm disabled
 +			 * tlb flush IPI delivery. We must reload CR3
 +			 * to make sure to use no freed page tables.
 +			 *
 +			 * As above, load_cr3() is serializing and orders TLB
 +			 * fills with respect to the mm_cpumask write.
 +			 */
 +			load_cr3(next->pgd);
 +			load_mm_ldt(next);
 +		}
 +	}
 +#endif
 +}
 +
 +#ifdef CONFIG_SMP
 +
  /*
   *
   * The flush IPI assumes that a thread switch happens in this order:
	From: Andy Lutomirski <luto@kernel.org>
	Date: Tue, 26 Apr 2016 09:39:08 -0700
	Subject: x86/mm, sched/core: Uninline switch_mm()

	commit 69c0319aabba45bcf33178916a2f06967b4adede upstream.

	It's fairly large and it has quite a few callers. This may also
	help untangle some headers down the road.

	Signed-off-by: Andy Lutomirski <luto@kernel.org>
	Reviewed-by: Borislav Petkov <bp@suse.de>
	Cc: Borislav Petkov <bp@alien8.de>
	Cc: Linus Torvalds <torvalds@linux-foundation.org>
	Cc: Peter Zijlstra <peterz@infradead.org>
	Cc: Thomas Gleixner <tglx@linutronix.de>
	Link: http://lkml.kernel.org/r/54f3367803e7f80b2be62c8a21879aa74b1a5f57.1461688545.git.luto@kernel.org
	Signed-off-by: Ingo Molnar <mingo@kernel.org>
	[Hugh Dickins: Backported to 3.2]
	Signed-off-by: Hugh Dickins <hughd@google.com>
	Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
	---
	arch/x86/include/asm/mmu_context.h \| 72 +-----------------------------------
	arch/x86/mm/tlb.c \| 75 ++++++++++++++++++++++++++++++++++++++
	2 files changed, 77 insertions(+), 70 deletions(-)

	diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
	index babbcd18a7a7..4a5d9b52d4f8 100644
	--- a/arch/x86/include/asm/mmu_context.h
	+++ b/arch/x86/include/asm/mmu_context.h
	@@ -75,76 +75,8 @@ static inline void enter_lazy_tlb(struct mm_struct mm, struct task_struct tsk)
	#endif
	}

	-static inline void switch_mm(struct mm_struct prev, struct mm_struct next,
	- struct task_struct *tsk)
	-{
	- unsigned cpu = smp_processor_id();
	-
	- if (likely(prev != next)) {
	-#ifdef CONFIG_SMP
	- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
	- percpu_write(cpu_tlbstate.active_mm, next);
	-#endif
	- cpumask_set_cpu(cpu, mm_cpumask(next));
	-
	- /*
	- * Re-load page tables.
	- *
	- * This logic has an ordering constraint:
	- *
	- * CPU 0: Write to a PTE for 'next'
	- * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
	- * CPU 1: set bit 1 in next's mm_cpumask
	- * CPU 1: load from the PTE that CPU 0 writes (implicit)
	- *
	- * We need to prevent an outcome in which CPU 1 observes
	- * the new PTE value and CPU 0 observes bit 1 clear in
	- * mm_cpumask. (If that occurs, then the IPI will never
	- * be sent, and CPU 0's TLB will contain a stale entry.)
	- *
	- * The bad outcome can occur if either CPU's load is
	- * reordered before that CPU's store, so both CPUs must
	- * execute full barriers to prevent this from happening.
	- *
	- * Thus, switch_mm needs a full barrier between the
	- * store to mm_cpumask and any operation that could load
	- * from next->pgd. TLB fills are special and can happen
	- * due to instruction fetches or for no reason at all,
	- * and neither LOCK nor MFENCE orders them.
	- * Fortunately, load_cr3() is serializing and gives the
	- * ordering guarantee we need.
	- *
	- */
	- load_cr3(next->pgd);
	-
	- /* stop flush ipis for the previous mm */
	- cpumask_clear_cpu(cpu, mm_cpumask(prev));
	-
	- /*
	- * load the LDT, if the LDT is different:
	- */
	- if (unlikely(prev->context.ldt != next->context.ldt))
	- load_mm_ldt(next);
	- }
	-#ifdef CONFIG_SMP
	- else {
	- percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
	- BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
	-
	- if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
	- /* We were in lazy tlb mode and leave_mm disabled
	- * tlb flush IPI delivery. We must reload CR3
	- * to make sure to use no freed page tables.
	- *
	- * As above, load_cr3() is serializing and orders TLB
	- * fills with respect to the mm_cpumask write.
	- */
	- load_cr3(next->pgd);
	- load_mm_ldt(next);
	- }
	- }
	-#endif
	-}
	+extern void switch_mm(struct mm_struct prev, struct mm_struct next,
	+ struct task_struct *tsk);

	#define activate_mm(prev, next) \
	do { \
	diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c
	index 8b731b6b4613..96481dad7532 100644
	--- a/arch/x86/mm/tlb.c
	+++ b/arch/x86/mm/tlb.c
	@@ -71,6 +71,81 @@ void leave_mm(int cpu)
	}
	EXPORT_SYMBOL_GPL(leave_mm);

	+#endif /* CONFIG_SMP */
	+
	+void switch_mm(struct mm_struct prev, struct mm_struct next,
	+ struct task_struct *tsk)
	+{
	+ unsigned cpu = smp_processor_id();
	+
	+ if (likely(prev != next)) {
	+#ifdef CONFIG_SMP
	+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
	+ percpu_write(cpu_tlbstate.active_mm, next);
	+#endif
	+ cpumask_set_cpu(cpu, mm_cpumask(next));
	+
	+ /*
	+ * Re-load page tables.
	+ *
	+ * This logic has an ordering constraint:
	+ *
	+ * CPU 0: Write to a PTE for 'next'
	+ * CPU 0: load bit 1 in mm_cpumask. if nonzero, send IPI.
	+ * CPU 1: set bit 1 in next's mm_cpumask
	+ * CPU 1: load from the PTE that CPU 0 writes (implicit)
	+ *
	+ * We need to prevent an outcome in which CPU 1 observes
	+ * the new PTE value and CPU 0 observes bit 1 clear in
	+ * mm_cpumask. (If that occurs, then the IPI will never
	+ * be sent, and CPU 0's TLB will contain a stale entry.)
	+ *
	+ * The bad outcome can occur if either CPU's load is
	+ * reordered before that CPU's store, so both CPUs must
	+ * execute full barriers to prevent this from happening.
	+ *
	+ * Thus, switch_mm needs a full barrier between the
	+ * store to mm_cpumask and any operation that could load
	+ * from next->pgd. TLB fills are special and can happen
	+ * due to instruction fetches or for no reason at all,
	+ * and neither LOCK nor MFENCE orders them.
	+ * Fortunately, load_cr3() is serializing and gives the
	+ * ordering guarantee we need.
	+ *
	+ */
	+ load_cr3(next->pgd);
	+
	+ /* stop flush ipis for the previous mm */
	+ cpumask_clear_cpu(cpu, mm_cpumask(prev));
	+
	+ /*
	+ * load the LDT, if the LDT is different:
	+ */
	+ if (unlikely(prev->context.ldt != next->context.ldt))
	+ load_mm_ldt(next);
	+ }
	+#ifdef CONFIG_SMP
	+ else {
	+ percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
	+ BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
	+
	+ if (!cpumask_test_and_set_cpu(cpu, mm_cpumask(next))) {
	+ /* We were in lazy tlb mode and leave_mm disabled
	+ * tlb flush IPI delivery. We must reload CR3
	+ * to make sure to use no freed page tables.
	+ *
	+ * As above, load_cr3() is serializing and orders TLB
	+ * fills with respect to the mm_cpumask write.
	+ */
	+ load_cr3(next->pgd);
	+ load_mm_ldt(next);
	+ }
	+ }
	+#endif
	+}
	+
	+#ifdef CONFIG_SMP
	+
	/*
	*
	* The flush IPI assumes that a thread switch happens in this order: