Merge branch 'parisc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux

Pull parisc fixes from Helge Deller:
 "We have one important patch from Dave Anglin and myself which fixes
  PTE/TLB race conditions which caused random segmentation faults on our
  debian buildd servers, and one patch from Alex Ivanov which speeds up
  the graphical text console on the STI framebuffer driver"

* 'parisc-4.2-1' of git://git.kernel.org/pub/scm/linux/kernel/git/deller/parisc-linux:
  parisc: Fix some PTE/TLB race conditions and optimize __flush_tlb_range based on timing results
  stifb: Implement hardware accelerated copyarea
diff --git a/arch/parisc/include/asm/pgtable.h b/arch/parisc/include/asm/pgtable.h
index 0a18375..f93c4a4 100644
--- a/arch/parisc/include/asm/pgtable.h
+++ b/arch/parisc/include/asm/pgtable.h
@@ -16,7 +16,7 @@
 #include <asm/processor.h>
 #include <asm/cache.h>
 
-extern spinlock_t pa_dbit_lock;
+extern spinlock_t pa_tlb_lock;
 
 /*
  * kern_addr_valid(ADDR) tests if ADDR is pointing to valid kernel
@@ -33,6 +33,19 @@
  */
 #define kern_addr_valid(addr)	(1)
 
+/* Purge data and instruction TLB entries.  Must be called holding
+ * the pa_tlb_lock.  The TLB purge instructions are slow on SMP
+ * machines since the purge must be broadcast to all CPUs.
+ */
+
+static inline void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+{
+	mtsp(mm->context, 1);
+	pdtlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
+}
+
 /* Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
@@ -42,15 +55,20 @@
                 *(pteptr) = (pteval);                           \
         } while(0)
 
-extern void purge_tlb_entries(struct mm_struct *, unsigned long);
+#define pte_inserted(x)						\
+	((pte_val(x) & (_PAGE_PRESENT|_PAGE_ACCESSED))		\
+	 == (_PAGE_PRESENT|_PAGE_ACCESSED))
 
-#define set_pte_at(mm, addr, ptep, pteval)                      \
-	do {                                                    \
+#define set_pte_at(mm, addr, ptep, pteval)			\
+	do {							\
+		pte_t old_pte;					\
 		unsigned long flags;				\
-		spin_lock_irqsave(&pa_dbit_lock, flags);	\
-		set_pte(ptep, pteval);                          \
-		purge_tlb_entries(mm, addr);                    \
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);	\
+		spin_lock_irqsave(&pa_tlb_lock, flags);		\
+		old_pte = *ptep;				\
+		set_pte(ptep, pteval);				\
+		if (pte_inserted(old_pte))			\
+			purge_tlb_entries(mm, addr);		\
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);	\
 	} while (0)
 
 #endif /* !__ASSEMBLY__ */
@@ -268,7 +286,7 @@
 
 #define pte_none(x)     (pte_val(x) == 0)
 #define pte_present(x)	(pte_val(x) & _PAGE_PRESENT)
-#define pte_clear(mm,addr,xp)	do { pte_val(*(xp)) = 0; } while (0)
+#define pte_clear(mm, addr, xp)  set_pte_at(mm, addr, xp, __pte(0))
 
 #define pmd_flag(x)	(pmd_val(x) & PxD_FLAG_MASK)
 #define pmd_address(x)	((unsigned long)(pmd_val(x) &~ PxD_FLAG_MASK) << PxD_VALUE_SHIFT)
@@ -435,15 +453,15 @@
 	if (!pte_young(*ptep))
 		return 0;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	pte = *ptep;
 	if (!pte_young(pte)) {
-		spin_unlock_irqrestore(&pa_dbit_lock, flags);
+		spin_unlock_irqrestore(&pa_tlb_lock, flags);
 		return 0;
 	}
 	set_pte(ptep, pte_mkold(pte));
 	purge_tlb_entries(vma->vm_mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 	return 1;
 }
 
@@ -453,11 +471,12 @@
 	pte_t old_pte;
 	unsigned long flags;
 
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	old_pte = *ptep;
-	pte_clear(mm,addr,ptep);
-	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	set_pte(ptep, __pte(0));
+	if (pte_inserted(old_pte))
+		purge_tlb_entries(mm, addr);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 
 	return old_pte;
 }
@@ -465,10 +484,10 @@
 static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&pa_dbit_lock, flags);
+	spin_lock_irqsave(&pa_tlb_lock, flags);
 	set_pte(ptep, pte_wrprotect(*ptep));
 	purge_tlb_entries(mm, addr);
-	spin_unlock_irqrestore(&pa_dbit_lock, flags);
+	spin_unlock_irqrestore(&pa_tlb_lock, flags);
 }
 
 #define pte_same(A,B)	(pte_val(A) == pte_val(B))
diff --git a/arch/parisc/include/asm/tlbflush.h b/arch/parisc/include/asm/tlbflush.h
index 9d086a5..e84b964 100644
--- a/arch/parisc/include/asm/tlbflush.h
+++ b/arch/parisc/include/asm/tlbflush.h
@@ -13,6 +13,9 @@
  * active at any one time on the Merced bus.  This tlb purge
  * synchronisation is fairly lightweight and harmless so we activate
  * it on all systems not just the N class.
+
+ * It is also used to ensure PTE updates are atomic and consistent
+ * with the TLB.
  */
 extern spinlock_t pa_tlb_lock;
 
@@ -24,20 +27,24 @@
 
 #define smp_flush_tlb_all()	flush_tlb_all()
 
+int __flush_tlb_range(unsigned long sid,
+	unsigned long start, unsigned long end);
+
+#define flush_tlb_range(vma, start, end) \
+	__flush_tlb_range((vma)->vm_mm->context, start, end)
+
+#define flush_tlb_kernel_range(start, end) \
+	__flush_tlb_range(0, start, end)
+
 /*
  * flush_tlb_mm()
  *
- * XXX This code is NOT valid for HP-UX compatibility processes,
- * (although it will probably work 99% of the time). HP-UX
- * processes are free to play with the space id's and save them
- * over long periods of time, etc. so we have to preserve the
- * space and just flush the entire tlb. We need to check the
- * personality in order to do that, but the personality is not
- * currently being set correctly.
- *
- * Of course, Linux processes could do the same thing, but
- * we don't support that (and the compilers, dynamic linker,
- * etc. do not do that).
+ * The code to switch to a new context is NOT valid for processes
+ * which play with the space id's.  Thus, we have to preserve the
+ * space and just flush the entire tlb.  However, the compilers,
+ * dynamic linker, etc, do not manipulate space id's, so there
+ * could be a significant performance benefit in switching contexts
+ * and not flushing the whole tlb.
  */
 
 static inline void flush_tlb_mm(struct mm_struct *mm)
@@ -45,10 +52,18 @@
 	BUG_ON(mm == &init_mm); /* Should never happen */
 
 #if 1 || defined(CONFIG_SMP)
+	/* Except for very small threads, flushing the whole TLB is
+	 * faster than using __flush_tlb_range.  The pdtlb and pitlb
+	 * instructions are very slow because of the TLB broadcast.
+	 * It might be faster to do local range flushes on all CPUs
+	 * on PA 2.0 systems.
+	 */
 	flush_tlb_all();
 #else
 	/* FIXME: currently broken, causing space id and protection ids
-	 *  to go out of sync, resulting in faults on userspace accesses.
+	 * to go out of sync, resulting in faults on userspace accesses.
+	 * This approach needs further investigation since running many
+	 * small applications (e.g., GCC testsuite) is faster on HP-UX.
 	 */
 	if (mm) {
 		if (mm->context != 0)
@@ -65,22 +80,12 @@
 {
 	unsigned long flags, sid;
 
-	/* For one page, it's not worth testing the split_tlb variable */
-
-	mb();
 	sid = vma->vm_mm->context;
 	purge_tlb_start(flags);
 	mtsp(sid, 1);
 	pdtlb(addr);
-	pitlb(addr);
+	if (unlikely(split_tlb))
+		pitlb(addr);
 	purge_tlb_end(flags);
 }
-
-void __flush_tlb_range(unsigned long sid,
-	unsigned long start, unsigned long end);
-
-#define flush_tlb_range(vma,start,end) __flush_tlb_range((vma)->vm_mm->context,start,end)
-
-#define flush_tlb_kernel_range(start, end) __flush_tlb_range(0,start,end)
-
 #endif
diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c
index f6448c7..cda6dbb 100644
--- a/arch/parisc/kernel/cache.c
+++ b/arch/parisc/kernel/cache.c
@@ -342,12 +342,15 @@
 EXPORT_SYMBOL(flush_kernel_icache_range_asm);
 
 #define FLUSH_THRESHOLD 0x80000 /* 0.5MB */
-int parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+static unsigned long parisc_cache_flush_threshold __read_mostly = FLUSH_THRESHOLD;
+
+#define FLUSH_TLB_THRESHOLD (2*1024*1024) /* 2MB initial TLB threshold */
+static unsigned long parisc_tlb_flush_threshold __read_mostly = FLUSH_TLB_THRESHOLD;
 
 void __init parisc_setup_cache_timing(void)
 {
 	unsigned long rangetime, alltime;
-	unsigned long size;
+	unsigned long size, start;
 
 	alltime = mfctl(16);
 	flush_data_cache();
@@ -364,14 +367,43 @@
 	/* Racy, but if we see an intermediate value, it's ok too... */
 	parisc_cache_flush_threshold = size * alltime / rangetime;
 
-	parisc_cache_flush_threshold = (parisc_cache_flush_threshold + L1_CACHE_BYTES - 1) &~ (L1_CACHE_BYTES - 1); 
+	parisc_cache_flush_threshold = L1_CACHE_ALIGN(parisc_cache_flush_threshold);
 	if (!parisc_cache_flush_threshold)
 		parisc_cache_flush_threshold = FLUSH_THRESHOLD;
 
 	if (parisc_cache_flush_threshold > cache_info.dc_size)
 		parisc_cache_flush_threshold = cache_info.dc_size;
 
-	printk(KERN_INFO "Setting cache flush threshold to %x (%d CPUs online)\n", parisc_cache_flush_threshold, num_online_cpus());
+	printk(KERN_INFO "Setting cache flush threshold to %lu kB\n",
+		parisc_cache_flush_threshold/1024);
+
+	/* calculate TLB flush threshold */
+
+	alltime = mfctl(16);
+	flush_tlb_all();
+	alltime = mfctl(16) - alltime;
+
+	size = PAGE_SIZE;
+	start = (unsigned long) _text;
+	rangetime = mfctl(16);
+	while (start < (unsigned long) _end) {
+		flush_tlb_kernel_range(start, start + PAGE_SIZE);
+		start += PAGE_SIZE;
+		size += PAGE_SIZE;
+	}
+	rangetime = mfctl(16) - rangetime;
+
+	printk(KERN_DEBUG "Whole TLB flush %lu cycles, flushing %lu bytes %lu cycles\n",
+		alltime, size, rangetime);
+
+	parisc_tlb_flush_threshold = size * alltime / rangetime;
+	parisc_tlb_flush_threshold *= num_online_cpus();
+	parisc_tlb_flush_threshold = PAGE_ALIGN(parisc_tlb_flush_threshold);
+	if (!parisc_tlb_flush_threshold)
+		parisc_tlb_flush_threshold = FLUSH_TLB_THRESHOLD;
+
+	printk(KERN_INFO "Setting TLB flush threshold to %lu kB\n",
+		parisc_tlb_flush_threshold/1024);
 }
 
 extern void purge_kernel_dcache_page_asm(unsigned long);
@@ -403,48 +435,45 @@
 }
 EXPORT_SYMBOL(copy_user_page);
 
-void purge_tlb_entries(struct mm_struct *mm, unsigned long addr)
+/* __flush_tlb_range()
+ *
+ * returns 1 if all TLBs were flushed.
+ */
+int __flush_tlb_range(unsigned long sid, unsigned long start,
+		      unsigned long end)
 {
-	unsigned long flags;
+	unsigned long flags, size;
 
-	/* Note: purge_tlb_entries can be called at startup with
-	   no context.  */
-
-	purge_tlb_start(flags);
-	mtsp(mm->context, 1);
-	pdtlb(addr);
-	pitlb(addr);
-	purge_tlb_end(flags);
-}
-EXPORT_SYMBOL(purge_tlb_entries);
-
-void __flush_tlb_range(unsigned long sid, unsigned long start,
-		       unsigned long end)
-{
-	unsigned long npages;
-
-	npages = ((end - (start & PAGE_MASK)) + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	if (npages >= 512)  /* 2MB of space: arbitrary, should be tuned */
+	size = (end - start);
+	if (size >= parisc_tlb_flush_threshold) {
 		flush_tlb_all();
-	else {
-		unsigned long flags;
+		return 1;
+	}
 
+	/* Purge TLB entries for small ranges using the pdtlb and
+	   pitlb instructions.  These instructions execute locally
+	   but cause a purge request to be broadcast to other TLBs.  */
+	if (likely(!split_tlb)) {
+		while (start < end) {
+			purge_tlb_start(flags);
+			mtsp(sid, 1);
+			pdtlb(start);
+			purge_tlb_end(flags);
+			start += PAGE_SIZE;
+		}
+		return 0;
+	}
+
+	/* split TLB case */
+	while (start < end) {
 		purge_tlb_start(flags);
 		mtsp(sid, 1);
-		if (split_tlb) {
-			while (npages--) {
-				pdtlb(start);
-				pitlb(start);
-				start += PAGE_SIZE;
-			}
-		} else {
-			while (npages--) {
-				pdtlb(start);
-				start += PAGE_SIZE;
-			}
-		}
+		pdtlb(start);
+		pitlb(start);
 		purge_tlb_end(flags);
+		start += PAGE_SIZE;
 	}
+	return 0;
 }
 
 static void cacheflush_h_tmp_function(void *dummy)
diff --git a/arch/parisc/kernel/entry.S b/arch/parisc/kernel/entry.S
index 7581961..c5ef408 100644
--- a/arch/parisc/kernel/entry.S
+++ b/arch/parisc/kernel/entry.S
@@ -45,7 +45,7 @@
 	.level 2.0
 #endif
 
-	.import         pa_dbit_lock,data
+	.import		pa_tlb_lock,data
 
 	/* space_to_prot macro creates a prot id from a space id */
 
@@ -420,8 +420,8 @@
 	SHLREG		%r9,PxD_VALUE_SHIFT,\pmd
 	extru		\va,31-PAGE_SHIFT,ASM_BITS_PER_PTE,\index
 	dep		%r0,31,PAGE_SHIFT,\pmd  /* clear offset */
-	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd
-	LDREG		%r0(\pmd),\pte		/* pmd is now pte */
+	shladd		\index,BITS_PER_PTE_ENTRY,\pmd,\pmd /* pmd is now pte */
+	LDREG		%r0(\pmd),\pte
 	bb,>=,n		\pte,_PAGE_PRESENT_BIT,\fault
 	.endm
 
@@ -453,57 +453,53 @@
 	L2_ptep		\pgd,\pte,\index,\va,\fault
 	.endm
 
-	/* Acquire pa_dbit_lock lock. */
-	.macro		dbit_lock	spc,tmp,tmp1
+	/* Acquire pa_tlb_lock lock and recheck page is still present. */
+	.macro		tlb_lock	spc,ptp,pte,tmp,tmp1,fault
 #ifdef CONFIG_SMP
 	cmpib,COND(=),n	0,\spc,2f
-	load32		PA(pa_dbit_lock),\tmp
+	load32		PA(pa_tlb_lock),\tmp
 1:	LDCW		0(\tmp),\tmp1
 	cmpib,COND(=)	0,\tmp1,1b
 	nop
+	LDREG		0(\ptp),\pte
+	bb,<,n		\pte,_PAGE_PRESENT_BIT,2f
+	b		\fault
+	stw		 \spc,0(\tmp)
 2:
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock without reloading lock address. */
-	.macro		dbit_unlock0	spc,tmp
+	/* Release pa_tlb_lock lock without reloading lock address. */
+	.macro		tlb_unlock0	spc,tmp
 #ifdef CONFIG_SMP
 	or,COND(=)	%r0,\spc,%r0
 	stw             \spc,0(\tmp)
 #endif
 	.endm
 
-	/* Release pa_dbit_lock lock. */
-	.macro		dbit_unlock1	spc,tmp
+	/* Release pa_tlb_lock lock. */
+	.macro		tlb_unlock1	spc,tmp
 #ifdef CONFIG_SMP
-	load32		PA(pa_dbit_lock),\tmp
-	dbit_unlock0	\spc,\tmp
+	load32		PA(pa_tlb_lock),\tmp
+	tlb_unlock0	\spc,\tmp
 #endif
 	.endm
 
 	/* Set the _PAGE_ACCESSED bit of the PTE.  Be clever and
 	 * don't needlessly dirty the cache line if it was already set */
-	.macro		update_ptep	spc,ptep,pte,tmp,tmp1
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_accessed	ptp,pte,tmp,tmp1
 	ldi		_PAGE_ACCESSED,\tmp1
 	or		\tmp1,\pte,\tmp
 	and,COND(<>)	\tmp1,\pte,%r0
-	STREG		\tmp,0(\ptep)
+	STREG		\tmp,0(\ptp)
 	.endm
 
 	/* Set the dirty bit (and accessed bit).  No need to be
 	 * clever, this is only used from the dirty fault */
-	.macro		update_dirty	spc,ptep,pte,tmp
-#ifdef CONFIG_SMP
-	or,COND(=)	%r0,\spc,%r0
-	LDREG		0(\ptep),\pte
-#endif
+	.macro		update_dirty	ptp,pte,tmp
 	ldi		_PAGE_ACCESSED|_PAGE_DIRTY,\tmp
 	or		\tmp,\pte,\pte
-	STREG		\pte,0(\ptep)
+	STREG		\pte,0(\ptp)
 	.endm
 
 	/* bitshift difference between a PFN (based on kernel's PAGE_SIZE)
@@ -1148,14 +1144,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1174,14 +1170,14 @@
 
 	L3_ptep		ptp,pte,t0,va,nadtlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1202,20 +1198,20 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1235,21 +1231,20 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	idtlba		pte,(%sr1,va)
 	idtlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1269,16 +1264,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	idtlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1297,16 +1292,16 @@
 
 	L2_ptep		ptp,pte,t0,va,nadtlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,nadtlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock1	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1406,14 +1401,14 @@
 
 	L3_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 	
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1430,14 +1425,14 @@
 
 	L3_ptep		ptp,pte,t0,va,naitlb_check_alias_20w
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20w
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1458,20 +1453,20 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1482,20 +1477,20 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_11
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_11
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
-	mfsp		%sr1,t0  /* Save sr1 so we can use it in tlb inserts */
+	mfsp		%sr1,t1  /* Save sr1 so we can use it in tlb inserts */
 	mtsp		spc,%sr1
 
 	iitlba		pte,(%sr1,va)
 	iitlbp		prot,(%sr1,va)
 
-	mtsp		t0, %sr1	/* Restore sr1 */
-	dbit_unlock1	spc,t0
+	mtsp		t1, %sr1	/* Restore sr1 */
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1516,16 +1511,16 @@
 
 	L2_ptep		ptp,pte,t0,va,itlb_fault
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,itlb_fault
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0	
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1536,16 +1531,16 @@
 
 	L2_ptep		ptp,pte,t0,va,naitlb_check_alias_20
 
-	dbit_lock	spc,t0,t1
-	update_ptep	spc,ptp,pte,t0,t1
+	tlb_lock	spc,ptp,pte,t0,t1,naitlb_check_alias_20
+	update_accessed	ptp,pte,t0,t1
 
 	make_insert_tlb	spc,pte,prot
 
-	f_extend	pte,t0
+	f_extend	pte,t1
 
 	iitlbt          pte,prot
-	dbit_unlock1	spc,t0
 
+	tlb_unlock1	spc,t0
 	rfir
 	nop
 
@@ -1568,14 +1563,14 @@
 
 	L3_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 		
 	idtlbt          pte,prot
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #else
@@ -1588,8 +1583,8 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb_11	spc,pte,prot
 
@@ -1600,8 +1595,8 @@
 	idtlbp		prot,(%sr1,va)
 
 	mtsp            t1, %sr1     /* Restore sr1 */
-	dbit_unlock0	spc,t0
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 
@@ -1612,16 +1607,16 @@
 
 	L2_ptep		ptp,pte,t0,va,dbit_fault
 
-	dbit_lock	spc,t0,t1
-	update_dirty	spc,ptp,pte,t1
+	tlb_lock	spc,ptp,pte,t0,t1,dbit_fault
+	update_dirty	ptp,pte,t1
 
 	make_insert_tlb	spc,pte,prot
 
 	f_extend	pte,t1
 	
-        idtlbt          pte,prot
-	dbit_unlock0	spc,t0
+	idtlbt		pte,prot
 
+	tlb_unlock0	spc,t0
 	rfir
 	nop
 #endif
diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c
index 6548fd1..b99b39f 100644
--- a/arch/parisc/kernel/traps.c
+++ b/arch/parisc/kernel/traps.c
@@ -43,10 +43,6 @@
 
 #include "../math-emu/math-emu.h"	/* for handle_fpe() */
 
-#if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
-DEFINE_SPINLOCK(pa_dbit_lock);
-#endif
-
 static void parisc_show_stack(struct task_struct *task, unsigned long *sp,
 	struct pt_regs *regs);
 
diff --git a/drivers/video/fbdev/stifb.c b/drivers/video/fbdev/stifb.c
index 86621fa..735355b 100644
--- a/drivers/video/fbdev/stifb.c
+++ b/drivers/video/fbdev/stifb.c
@@ -121,6 +121,7 @@
 #define REG_3		0x0004a0
 #define REG_4		0x000600
 #define REG_6		0x000800
+#define REG_7		0x000804
 #define REG_8		0x000820
 #define REG_9		0x000a04
 #define REG_10		0x018000
@@ -135,6 +136,8 @@
 #define REG_21		0x200218
 #define REG_22		0x0005a0
 #define REG_23		0x0005c0
+#define REG_24		0x000808
+#define REG_25		0x000b00
 #define REG_26		0x200118
 #define REG_27		0x200308
 #define REG_32		0x21003c
@@ -429,6 +432,9 @@
 #define SET_LENXY_START_RECFILL(fb, lenxy) \
 	WRITE_WORD(lenxy, fb, REG_9)
 
+#define SETUP_COPYAREA(fb) \
+	WRITE_BYTE(0, fb, REG_16b1)
+
 static void
 HYPER_ENABLE_DISABLE_DISPLAY(struct stifb_info *fb, int enable)
 {
@@ -1004,6 +1010,36 @@
 	return 0;
 }
 
+static void
+stifb_copyarea(struct fb_info *info, const struct fb_copyarea *area)
+{
+	struct stifb_info *fb = container_of(info, struct stifb_info, info);
+
+	SETUP_COPYAREA(fb);
+
+	SETUP_HW(fb);
+	if (fb->info.var.bits_per_pixel == 32) {
+		WRITE_WORD(0xBBA0A000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xffffffff);
+	} else {
+		WRITE_WORD(fb->id == S9000_ID_HCRX ? 0x13a02000 : 0x13a01000, fb, REG_10);
+
+		NGLE_REALLY_SET_IMAGE_PLANEMASK(fb, 0xff);
+	}
+
+	NGLE_QUICK_SET_IMAGE_BITMAP_OP(fb,
+		IBOvals(RopSrc, MaskAddrOffset(0),
+		BitmapExtent08, StaticReg(1),
+		DataDynamic, MaskOtc, BGx(0), FGx(0)));
+
+	WRITE_WORD(((area->sx << 16) | area->sy), fb, REG_24);
+	WRITE_WORD(((area->width << 16) | area->height), fb, REG_7);
+	WRITE_WORD(((area->dx << 16) | area->dy), fb, REG_25);
+
+	SETUP_FB(fb);
+}
+
 static void __init
 stifb_init_display(struct stifb_info *fb)
 {
@@ -1069,7 +1105,7 @@
 	.fb_setcolreg	= stifb_setcolreg,
 	.fb_blank	= stifb_blank,
 	.fb_fillrect	= cfb_fillrect,
-	.fb_copyarea	= cfb_copyarea,
+	.fb_copyarea	= stifb_copyarea,
 	.fb_imageblit	= cfb_imageblit,
 };
 
@@ -1258,7 +1294,7 @@
 	info->fbops = &stifb_ops;
 	info->screen_base = ioremap_nocache(REGION_BASE(fb,1), fix->smem_len);
 	info->screen_size = fix->smem_len;
-	info->flags = FBINFO_DEFAULT;
+	info->flags = FBINFO_DEFAULT | FBINFO_HWACCEL_COPYAREA;
 	info->pseudo_palette = &fb->pseudo_palette;
 
 	/* This has to be done !!! */