releases/3.10.73/sparc64-fix-several-bugs-in-memmove.patch - pub/scm/linux/kernel/git/stable/stable-queue - Git at Google

 From foo@baz Tue Mar 24 10:57:46 CET 2015
 From: "David S. Miller" <davem@davemloft.net>
 Date: Mon, 23 Mar 2015 09:22:10 -0700
 Subject: sparc64: Fix several bugs in memmove().

 From: "David S. Miller" <davem@davemloft.net>

 [ Upstream commit 2077cef4d5c29cf886192ec32066f783d6a80db8 ]

 Firstly, handle zero length calls properly.  Believe it or not there
 are a few of these happening during early boot.

 Next, we can't just drop to a memcpy() call in the forward copy case
 where dst <= src.  The reason is that the cache initializing stores
 used in the Niagara memcpy() implementations can end up clearing out
 cache lines before we've sourced their original contents completely.

 For example, considering NG4memcpy, the main unrolled loop begins like
 this:

      load   src + 0x00
      load   src + 0x08
      load   src + 0x10
      load   src + 0x18
      load   src + 0x20
      store  dst + 0x00

 Assume dst is 64 byte aligned and let's say that dst is src - 8 for
 this memcpy() call.  That store at the end there is the one to the
 first line in the cache line, thus clearing the whole line, which thus
 clobbers "src + 0x28" before it even gets loaded.

 To avoid this, just fall through to a simple copy only mildly
 optimized for the case where src and dst are 8 byte aligned and the
 length is a multiple of 8 as well.  We could get fancy and call
 GENmemcpy() but this is good enough for how this thing is actually
 used.

 Reported-by: David Ahern <david.ahern@oracle.com>
 Reported-by: Bob Picco <bpicco@meloft.net>
 Signed-off-by: David S. Miller <davem@davemloft.net>
 Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
 ---
  arch/sparc/lib/memmove.S |   35 ++++++++++++++++++++++++++++++++---
  1 file changed, 32 insertions(+), 3 deletions(-)

 --- a/arch/sparc/lib/memmove.S
 +++ b/arch/sparc/lib/memmove.S
 @@ -8,9 +8,11 @@

  	.text
  ENTRY(memmove) /* o0=dst o1=src o2=len */
 -	mov		%o0, %g1
 +	brz,pn		%o2, 99f
 +	 mov		%o0, %g1
 +
  	cmp		%o0, %o1
 -	bleu,pt		%xcc, memcpy
 +	bleu,pt		%xcc, 2f
  	 add		%o1, %o2, %g7
  	cmp		%g7, %o0
  	bleu,pt		%xcc, memcpy
 @@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len *
  	stb		%g7, [%o0]
  	bne,pt		%icc, 1b
  	 sub		%o0, 1, %o0
 -
 +99:
  	retl
  	 mov		%g1, %o0
 +
 +	/* We can't just call memcpy for these memmove cases.  On some
 +	 * chips the memcpy uses cache initializing stores and when dst
 +	 * and src are close enough, those can clobber the source data
 +	 * before we've loaded it in.
 +	 */
 +2:	or		%o0, %o1, %g7
 +	or		%o2, %g7, %g7
 +	andcc		%g7, 0x7, %g0
 +	bne,pn		%xcc, 4f
 +	 nop
 +
 +3:	ldx		[%o1], %g7
 +	add		%o1, 8, %o1
 +	subcc		%o2, 8, %o2
 +	add		%o0, 8, %o0
 +	bne,pt		%icc, 3b
 +	 stx		%g7, [%o0 - 0x8]
 +	ba,a,pt		%xcc, 99b
 +
 +4:	ldub		[%o1], %g7
 +	add		%o1, 1, %o1
 +	subcc		%o2, 1, %o2
 +	add		%o0, 1, %o0
 +	bne,pt		%icc, 4b
 +	 stb		%g7, [%o0 - 0x1]
 +	ba,a,pt		%xcc, 99b
  ENDPROC(memmove)
	From foo@baz Tue Mar 24 10:57:46 CET 2015
	From: "David S. Miller" <davem@davemloft.net>
	Date: Mon, 23 Mar 2015 09:22:10 -0700
	Subject: sparc64: Fix several bugs in memmove().

	From: "David S. Miller" <davem@davemloft.net>

	[ Upstream commit 2077cef4d5c29cf886192ec32066f783d6a80db8 ]

	Firstly, handle zero length calls properly. Believe it or not there
	are a few of these happening during early boot.

	Next, we can't just drop to a memcpy() call in the forward copy case
	where dst <= src. The reason is that the cache initializing stores
	used in the Niagara memcpy() implementations can end up clearing out
	cache lines before we've sourced their original contents completely.

	For example, considering NG4memcpy, the main unrolled loop begins like
	this:

	load src + 0x00
	load src + 0x08
	load src + 0x10
	load src + 0x18
	load src + 0x20
	store dst + 0x00

	Assume dst is 64 byte aligned and let's say that dst is src - 8 for
	this memcpy() call. That store at the end there is the one to the
	first line in the cache line, thus clearing the whole line, which thus
	clobbers "src + 0x28" before it even gets loaded.

	To avoid this, just fall through to a simple copy only mildly
	optimized for the case where src and dst are 8 byte aligned and the
	length is a multiple of 8 as well. We could get fancy and call
	GENmemcpy() but this is good enough for how this thing is actually
	used.

	Reported-by: David Ahern <david.ahern@oracle.com>
	Reported-by: Bob Picco <bpicco@meloft.net>
	Signed-off-by: David S. Miller <davem@davemloft.net>
	Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
	---
	arch/sparc/lib/memmove.S \| 35 ++++++++++++++++++++++++++++++++---
	1 file changed, 32 insertions(+), 3 deletions(-)

	--- a/arch/sparc/lib/memmove.S
	+++ b/arch/sparc/lib/memmove.S
	@@ -8,9 +8,11 @@

	.text
	ENTRY(memmove) /* o0=dst o1=src o2=len */
	- mov %o0, %g1
	+ brz,pn %o2, 99f
	+ mov %o0, %g1
	+
	cmp %o0, %o1
	- bleu,pt %xcc, memcpy
	+ bleu,pt %xcc, 2f
	add %o1, %o2, %g7
	cmp %g7, %o0
	bleu,pt %xcc, memcpy
	@@ -24,7 +26,34 @@ ENTRY(memmove) /* o0=dst o1=src o2=len *
	stb %g7, [%o0]
	bne,pt %icc, 1b
	sub %o0, 1, %o0
	-
	+99:
	retl
	mov %g1, %o0
	+
	+ /* We can't just call memcpy for these memmove cases. On some
	+ * chips the memcpy uses cache initializing stores and when dst
	+ * and src are close enough, those can clobber the source data
	+ * before we've loaded it in.
	+ */
	+2: or %o0, %o1, %g7
	+ or %o2, %g7, %g7
	+ andcc %g7, 0x7, %g0
	+ bne,pn %xcc, 4f
	+ nop
	+
	+3: ldx [%o1], %g7
	+ add %o1, 8, %o1
	+ subcc %o2, 8, %o2
	+ add %o0, 8, %o0
	+ bne,pt %icc, 3b
	+ stx %g7, [%o0 - 0x8]
	+ ba,a,pt %xcc, 99b
	+
	+4: ldub [%o1], %g7
	+ add %o1, 1, %o1
	+ subcc %o2, 1, %o2
	+ add %o0, 1, %o0
	+ bne,pt %icc, 4b
	+ stb %g7, [%o0 - 0x1]
	+ ba,a,pt %xcc, 99b
	ENDPROC(memmove)