Merge branch 'sparc64-context-wrap-fixes'

Pavel Tatashin says:

====================
sparc64: context wrap fixes

This patch series contains fixes for context wrap: when we are out of
context ids, and need to get a new version.

It fixes memory corruption issues which happen when more than number of
context ids (currently set to 8K) number of processes are started
simultaneously, and processes can get a wrong context.

sparc64: new context wrap:
- contains explanation of new wrap method, and also explanation of races
  that it solves
sparc64: reset mm cpumask after wrap
- explains issue of not reseting cpu mask on a wrap
====================

Signed-off-by: David S. Miller <davem@davemloft.net>
diff --git a/arch/sparc/include/asm/mmu_64.h b/arch/sparc/include/asm/mmu_64.h
index f7de0db..83b36a5 100644
--- a/arch/sparc/include/asm/mmu_64.h
+++ b/arch/sparc/include/asm/mmu_64.h
@@ -52,7 +52,7 @@
 #define CTX_NR_MASK		TAG_CONTEXT_BITS
 #define CTX_HW_MASK		(CTX_NR_MASK | CTX_PGSZ_MASK)
 
-#define CTX_FIRST_VERSION	((_AC(1,UL) << CTX_VERSION_SHIFT) + _AC(1,UL))
+#define CTX_FIRST_VERSION	BIT(CTX_VERSION_SHIFT)
 #define CTX_VALID(__ctx)	\
 	 (!(((__ctx.sparc64_ctx_val) ^ tlb_context_cache) & CTX_VERSION_MASK))
 #define CTX_HWBITS(__ctx)	((__ctx.sparc64_ctx_val) & CTX_HW_MASK)
diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h
index 22fede6..2cddcda 100644
--- a/arch/sparc/include/asm/mmu_context_64.h
+++ b/arch/sparc/include/asm/mmu_context_64.h
@@ -19,13 +19,8 @@
 extern unsigned long tlb_context_cache;
 extern unsigned long mmu_context_bmap[];
 
+DECLARE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm);
 void get_new_mmu_context(struct mm_struct *mm);
-#ifdef CONFIG_SMP
-void smp_new_mmu_context_version(void);
-#else
-#define smp_new_mmu_context_version() do { } while (0)
-#endif
-
 int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
 void destroy_context(struct mm_struct *mm);
 
@@ -76,8 +71,9 @@
 static inline void switch_mm(struct mm_struct *old_mm, struct mm_struct *mm, struct task_struct *tsk)
 {
 	unsigned long ctx_valid, flags;
-	int cpu;
+	int cpu = smp_processor_id();
 
+	per_cpu(per_cpu_secondary_mm, cpu) = mm;
 	if (unlikely(mm == &init_mm))
 		return;
 
@@ -123,7 +119,6 @@
 	 * for the first time, we must flush that context out of the
 	 * local TLB.
 	 */
-	cpu = smp_processor_id();
 	if (!ctx_valid || !cpumask_test_cpu(cpu, mm_cpumask(mm))) {
 		cpumask_set_cpu(cpu, mm_cpumask(mm));
 		__flush_tlb_mm(CTX_HWBITS(mm->context),
@@ -133,26 +128,7 @@
 }
 
 #define deactivate_mm(tsk,mm)	do { } while (0)
-
-/* Activate a new MM instance for the current task. */
-static inline void activate_mm(struct mm_struct *active_mm, struct mm_struct *mm)
-{
-	unsigned long flags;
-	int cpu;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-	if (!CTX_VALID(mm->context))
-		get_new_mmu_context(mm);
-	cpu = smp_processor_id();
-	if (!cpumask_test_cpu(cpu, mm_cpumask(mm)))
-		cpumask_set_cpu(cpu, mm_cpumask(mm));
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context), SECONDARY_CONTEXT);
-	tsb_context_switch(mm);
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-}
-
+#define activate_mm(active_mm, mm) switch_mm(active_mm, mm, NULL)
 #endif /* !(__ASSEMBLY__) */
 
 #endif /* !(__SPARC64_MMU_CONTEXT_H) */
diff --git a/arch/sparc/include/asm/pil.h b/arch/sparc/include/asm/pil.h
index 26693703..522b43d 100644
--- a/arch/sparc/include/asm/pil.h
+++ b/arch/sparc/include/asm/pil.h
@@ -20,7 +20,6 @@
 #define PIL_SMP_CALL_FUNC	1
 #define PIL_SMP_RECEIVE_SIGNAL	2
 #define PIL_SMP_CAPTURE		3
-#define PIL_SMP_CTX_NEW_VERSION	4
 #define PIL_DEVICE_IRQ		5
 #define PIL_SMP_CALL_FUNC_SNGL	6
 #define PIL_DEFERRED_PCR_WORK	7
diff --git a/arch/sparc/kernel/kernel.h b/arch/sparc/kernel/kernel.h
index c9804551..6ae1e77 100644
--- a/arch/sparc/kernel/kernel.h
+++ b/arch/sparc/kernel/kernel.h
@@ -37,7 +37,6 @@
 /* smp_64.c */
 void __irq_entry smp_call_function_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_call_function_single_client(int irq, struct pt_regs *regs);
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs);
 void __irq_entry smp_penguin_jailcell(int irq, struct pt_regs *regs);
 void __irq_entry smp_receive_signal_client(int irq, struct pt_regs *regs);
 
diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c
index b3bc0ac..fdf3104 100644
--- a/arch/sparc/kernel/smp_64.c
+++ b/arch/sparc/kernel/smp_64.c
@@ -964,37 +964,6 @@
 	preempt_enable();
 }
 
-void __irq_entry smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
-{
-	struct mm_struct *mm;
-	unsigned long flags;
-
-	clear_softint(1 << irq);
-
-	/* See if we need to allocate a new TLB context because
-	 * the version of the one we are using is now out of date.
-	 */
-	mm = current->active_mm;
-	if (unlikely(!mm || (mm == &init_mm)))
-		return;
-
-	spin_lock_irqsave(&mm->context.lock, flags);
-
-	if (unlikely(!CTX_VALID(mm->context)))
-		get_new_mmu_context(mm);
-
-	spin_unlock_irqrestore(&mm->context.lock, flags);
-
-	load_secondary_context(mm);
-	__flush_tlb_mm(CTX_HWBITS(mm->context),
-		       SECONDARY_CONTEXT);
-}
-
-void smp_new_mmu_context_version(void)
-{
-	smp_cross_call(&xcall_new_mmu_context_version, 0, 0, 0);
-}
-
 #ifdef CONFIG_KGDB
 void kgdb_roundup_cpus(unsigned long flags)
 {
diff --git a/arch/sparc/kernel/ttable_64.S b/arch/sparc/kernel/ttable_64.S
index 7bd8f65..efe93ab 100644
--- a/arch/sparc/kernel/ttable_64.S
+++ b/arch/sparc/kernel/ttable_64.S
@@ -50,7 +50,7 @@
 tl0_irq1:	TRAP_IRQ(smp_call_function_client, 1)
 tl0_irq2:	TRAP_IRQ(smp_receive_signal_client, 2)
 tl0_irq3:	TRAP_IRQ(smp_penguin_jailcell, 3)
-tl0_irq4:	TRAP_IRQ(smp_new_mmu_context_version_client, 4)
+tl0_irq4:       BTRAP(0x44)
 #else
 tl0_irq1:	BTRAP(0x41)
 tl0_irq2:	BTRAP(0x42)
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 7ca1b9d..3c40ebd 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -707,10 +707,58 @@
 
 /* get_new_mmu_context() uses "cache + 1".  */
 DEFINE_SPINLOCK(ctx_alloc_lock);
-unsigned long tlb_context_cache = CTX_FIRST_VERSION - 1;
+unsigned long tlb_context_cache = CTX_FIRST_VERSION;
 #define MAX_CTX_NR	(1UL << CTX_NR_BITS)
 #define CTX_BMAP_SLOTS	BITS_TO_LONGS(MAX_CTX_NR)
 DECLARE_BITMAP(mmu_context_bmap, MAX_CTX_NR);
+DEFINE_PER_CPU(struct mm_struct *, per_cpu_secondary_mm) = {0};
+
+static void mmu_context_wrap(void)
+{
+	unsigned long old_ver = tlb_context_cache & CTX_VERSION_MASK;
+	unsigned long new_ver, new_ctx, old_ctx;
+	struct mm_struct *mm;
+	int cpu;
+
+	bitmap_zero(mmu_context_bmap, 1 << CTX_NR_BITS);
+
+	/* Reserve kernel context */
+	set_bit(0, mmu_context_bmap);
+
+	new_ver = (tlb_context_cache & CTX_VERSION_MASK) + CTX_FIRST_VERSION;
+	if (unlikely(new_ver == 0))
+		new_ver = CTX_FIRST_VERSION;
+	tlb_context_cache = new_ver;
+
+	/*
+	 * Make sure that any new mm that are added into per_cpu_secondary_mm,
+	 * are going to go through get_new_mmu_context() path.
+	 */
+	mb();
+
+	/*
+	 * Updated versions to current on those CPUs that had valid secondary
+	 * contexts
+	 */
+	for_each_online_cpu(cpu) {
+		/*
+		 * If a new mm is stored after we took this mm from the array,
+		 * it will go into get_new_mmu_context() path, because we
+		 * already bumped the version in tlb_context_cache.
+		 */
+		mm = per_cpu(per_cpu_secondary_mm, cpu);
+
+		if (unlikely(!mm || mm == &init_mm))
+			continue;
+
+		old_ctx = mm->context.sparc64_ctx_val;
+		if (likely((old_ctx & CTX_VERSION_MASK) == old_ver)) {
+			new_ctx = (old_ctx & ~CTX_VERSION_MASK) | new_ver;
+			set_bit(new_ctx & CTX_NR_MASK, mmu_context_bmap);
+			mm->context.sparc64_ctx_val = new_ctx;
+		}
+	}
+}
 
 /* Caller does TLB context flushing on local CPU if necessary.
  * The caller also ensures that CTX_VALID(mm->context) is false.
@@ -726,48 +774,30 @@
 {
 	unsigned long ctx, new_ctx;
 	unsigned long orig_pgsz_bits;
-	int new_version;
 
 	spin_lock(&ctx_alloc_lock);
+retry:
+	/* wrap might have happened, test again if our context became valid */
+	if (unlikely(CTX_VALID(mm->context)))
+		goto out;
 	orig_pgsz_bits = (mm->context.sparc64_ctx_val & CTX_PGSZ_MASK);
 	ctx = (tlb_context_cache + 1) & CTX_NR_MASK;
 	new_ctx = find_next_zero_bit(mmu_context_bmap, 1 << CTX_NR_BITS, ctx);
-	new_version = 0;
 	if (new_ctx >= (1 << CTX_NR_BITS)) {
 		new_ctx = find_next_zero_bit(mmu_context_bmap, ctx, 1);
 		if (new_ctx >= ctx) {
-			int i;
-			new_ctx = (tlb_context_cache & CTX_VERSION_MASK) +
-				CTX_FIRST_VERSION;
-			if (new_ctx == 1)
-				new_ctx = CTX_FIRST_VERSION;
-
-			/* Don't call memset, for 16 entries that's just
-			 * plain silly...
-			 */
-			mmu_context_bmap[0] = 3;
-			mmu_context_bmap[1] = 0;
-			mmu_context_bmap[2] = 0;
-			mmu_context_bmap[3] = 0;
-			for (i = 4; i < CTX_BMAP_SLOTS; i += 4) {
-				mmu_context_bmap[i + 0] = 0;
-				mmu_context_bmap[i + 1] = 0;
-				mmu_context_bmap[i + 2] = 0;
-				mmu_context_bmap[i + 3] = 0;
-			}
-			new_version = 1;
-			goto out;
+			mmu_context_wrap();
+			goto retry;
 		}
 	}
+	if (mm->context.sparc64_ctx_val)
+		cpumask_clear(mm_cpumask(mm));
 	mmu_context_bmap[new_ctx>>6] |= (1UL << (new_ctx & 63));
 	new_ctx |= (tlb_context_cache & CTX_VERSION_MASK);
-out:
 	tlb_context_cache = new_ctx;
 	mm->context.sparc64_ctx_val = new_ctx | orig_pgsz_bits;
+out:
 	spin_unlock(&ctx_alloc_lock);
-
-	if (unlikely(new_version))
-		smp_new_mmu_context_version();
 }
 
 static int numa_enabled = 1;
diff --git a/arch/sparc/mm/ultra.S b/arch/sparc/mm/ultra.S
index 5d2fd6c..fcf4d27 100644
--- a/arch/sparc/mm/ultra.S
+++ b/arch/sparc/mm/ultra.S
@@ -971,11 +971,6 @@
 	wr		%g0, (1 << PIL_SMP_CAPTURE), %set_softint
 	retry
 
-	.globl		xcall_new_mmu_context_version
-xcall_new_mmu_context_version:
-	wr		%g0, (1 << PIL_SMP_CTX_NEW_VERSION), %set_softint
-	retry
-
 #ifdef CONFIG_KGDB
 	.globl		xcall_kgdb_capture
 xcall_kgdb_capture: