|  | /* memcpy.S: optimised assembly memcpy | 
|  | * | 
|  | * Copyright (C) 2003 Red Hat, Inc. All Rights Reserved. | 
|  | * Written by David Howells (dhowells@redhat.com) | 
|  | * | 
|  | * This program is free software; you can redistribute it and/or | 
|  | * modify it under the terms of the GNU General Public License | 
|  | * as published by the Free Software Foundation; either version | 
|  | * 2 of the License, or (at your option) any later version. | 
|  | */ | 
|  |  | 
|  |  | 
|  | .text | 
|  | .p2align	4 | 
|  |  | 
|  | ############################################################################### | 
|  | # | 
|  | # void *memcpy(void *to, const char *from, size_t count) | 
|  | # | 
|  | # - NOTE: must not use any stack. exception detection performs function return | 
|  | #         to caller's fixup routine, aborting the remainder of the copy | 
|  | # | 
|  | ############################################################################### | 
|  | .globl		memcpy,__memcpy_end | 
|  | .type		memcpy,@function | 
|  | memcpy: | 
|  | or.p		gr8,gr9,gr4 | 
|  | orcc		gr10,gr0,gr0,icc3 | 
|  | or.p		gr10,gr4,gr4 | 
|  | beqlr		icc3,#0 | 
|  |  | 
|  | # optimise based on best common alignment for to, from & count | 
|  | andicc.p	gr4,#0x0f,gr0,icc0 | 
|  | setlos		#8,gr11 | 
|  | andicc.p	gr4,#0x07,gr0,icc1 | 
|  | beq		icc0,#0,memcpy_16 | 
|  | andicc.p	gr4,#0x03,gr0,icc0 | 
|  | beq		icc1,#0,memcpy_8 | 
|  | andicc.p	gr4,#0x01,gr0,icc1 | 
|  | beq		icc0,#0,memcpy_4 | 
|  | setlos.p	#1,gr11 | 
|  | beq		icc1,#0,memcpy_2 | 
|  |  | 
|  | # do byte by byte copy | 
|  | sub.p		gr8,gr11,gr3 | 
|  | sub		gr9,gr11,gr9 | 
|  | 0:	ldubu.p		@(gr9,gr11),gr4 | 
|  | subicc		gr10,#1,gr10,icc0 | 
|  | stbu.p		gr4,@(gr3,gr11) | 
|  | bne		icc0,#2,0b | 
|  | bralr | 
|  |  | 
|  | # do halfword by halfword copy | 
|  | memcpy_2: | 
|  | setlos		#2,gr11 | 
|  | sub.p		gr8,gr11,gr3 | 
|  | sub		gr9,gr11,gr9 | 
|  | 0:	lduhu.p		@(gr9,gr11),gr4 | 
|  | subicc		gr10,#2,gr10,icc0 | 
|  | sthu.p		gr4,@(gr3,gr11) | 
|  | bne		icc0,#2,0b | 
|  | bralr | 
|  |  | 
|  | # do word by word copy | 
|  | memcpy_4: | 
|  | setlos		#4,gr11 | 
|  | sub.p		gr8,gr11,gr3 | 
|  | sub		gr9,gr11,gr9 | 
|  | 0:	ldu.p		@(gr9,gr11),gr4 | 
|  | subicc		gr10,#4,gr10,icc0 | 
|  | stu.p		gr4,@(gr3,gr11) | 
|  | bne		icc0,#2,0b | 
|  | bralr | 
|  |  | 
|  | # do double-word by double-word copy | 
|  | memcpy_8: | 
|  | sub.p		gr8,gr11,gr3 | 
|  | sub		gr9,gr11,gr9 | 
|  | 0:	lddu.p		@(gr9,gr11),gr4 | 
|  | subicc		gr10,#8,gr10,icc0 | 
|  | stdu.p		gr4,@(gr3,gr11) | 
|  | bne		icc0,#2,0b | 
|  | bralr | 
|  |  | 
|  | # do quad-word by quad-word copy | 
|  | memcpy_16: | 
|  | sub.p		gr8,gr11,gr3 | 
|  | sub		gr9,gr11,gr9 | 
|  | 0:	lddu		@(gr9,gr11),gr4 | 
|  | lddu.p		@(gr9,gr11),gr6 | 
|  | subicc		gr10,#16,gr10,icc0 | 
|  | stdu		gr4,@(gr3,gr11) | 
|  | stdu.p		gr6,@(gr3,gr11) | 
|  | bne		icc0,#2,0b | 
|  | bralr | 
|  | __memcpy_end: | 
|  |  | 
|  | .size		memcpy, __memcpy_end-memcpy | 
|  |  | 
|  | ############################################################################### | 
|  | # | 
|  | # copy to/from userspace | 
|  | # - return the number of bytes that could not be copied (0 on complete success) | 
|  | # | 
|  | # long __memcpy_user(void *dst, const void *src, size_t count) | 
|  | # | 
|  | ############################################################################### | 
|  | .globl		__memcpy_user, __memcpy_user_error_lr, __memcpy_user_error_handler | 
|  | .type		__memcpy_user,@function | 
|  | __memcpy_user: | 
|  | movsg		lr,gr7 | 
|  | subi.p		sp,#8,sp | 
|  | add		gr8,gr10,gr6		; calculate expected end address | 
|  | stdi		gr6,@(sp,#0) | 
|  |  | 
|  | # abuse memcpy to do the dirty work | 
|  | call		memcpy | 
|  | __memcpy_user_error_lr: | 
|  | ldi.p		@(sp,#4),gr7 | 
|  | setlos		#0,gr8 | 
|  | jmpl.p		@(gr7,gr0) | 
|  | addi		sp,#8,sp | 
|  |  | 
|  | # deal any exception generated by memcpy | 
|  | # GR8 - memcpy's current dest address | 
|  | # GR11 - memset's step value (index register for store insns) | 
|  | __memcpy_user_error_handler: | 
|  | lddi.p		@(sp,#0),gr4		; load GR4 with dst+count, GR5 with ret addr | 
|  | add		gr11,gr3,gr7 | 
|  | sub.p		gr4,gr7,gr8 | 
|  |  | 
|  | addi		sp,#8,sp | 
|  | jmpl		@(gr5,gr0) | 
|  |  | 
|  | .size		__memcpy_user, .-__memcpy_user |