| /* SPDX-License-Identifier: GPL-2.0 */ |
| /* |
| * Barely optimized memmove routine for sw64. |
| * This is hand-massaged output from the original memcpy.c. We defer to |
| * memcpy whenever possible; the backwards copy loops are not unrolled. |
| */ |
| #include <asm/export.h> |
| .set noat |
| .set noreorder |
| .text |
| |
| .align 4 |
| .globl memmove |
| .ent memmove |
| memmove: |
| ldgp $29, 0($27) |
| unop |
| .prologue 1 |
| |
| addl $16, $18, $4 |
| addl $17, $18, $5 |
| cmpule $4, $17, $1 # dest + n <= src |
| cmpule $5, $16, $2 # dest >= src + n |
| |
| bis $1, $2, $1 |
| mov $16, $0 |
| xor $16, $17, $2 |
| bne $1, memcpy # samegp |
| |
| and $2, 7, $2 # Test for src/dest co-alignment. |
| and $16, 7, $1 |
| cmpule $16, $17, $3 |
| bne $3, $memmove_up # dest < src |
| |
| and $4, 7, $1 |
| bne $2, $misaligned_dn |
| unop |
| beq $1, $skip_aligned_byte_loop_head_dn |
| |
| $aligned_byte_loop_head_dn: |
| ldi $4, -1($4) |
| ldi $5, -1($5) |
| unop |
| ble $18, $egress |
| |
| ldbu $1, 0($5) |
| ldi $18, -1($18) |
| stb $1, 0($4) |
| |
| and $4, 7, $6 |
| bne $6, $aligned_byte_loop_head_dn |
| |
| $skip_aligned_byte_loop_head_dn: |
| ldi $18, -8($18) |
| blt $18, $skip_aligned_word_loop_dn |
| |
| $aligned_word_loop_dn: |
| ldl $1, -8($5) |
| ldi $5, -8($5) |
| ldi $18, -8($18) |
| |
| stl $1, -8($4) |
| ldi $4, -8($4) |
| bge $18, $aligned_word_loop_dn |
| |
| $skip_aligned_word_loop_dn: |
| ldi $18, 8($18) |
| bgt $18, $byte_loop_tail_dn |
| unop |
| ret $31, ($26), 1 |
| |
| .align 4 |
| $misaligned_dn: |
| fnop |
| unop |
| beq $18, $egress |
| |
| $byte_loop_tail_dn: |
| ldbu $1, -1($5) |
| ldi $5, -1($5) |
| ldi $4, -1($4) |
| |
| ldi $18, -1($18) |
| stb $1, 0($4) |
| |
| bgt $18, $byte_loop_tail_dn |
| br $egress |
| |
| $memmove_up: |
| mov $16, $4 |
| mov $17, $5 |
| bne $2, $misaligned_up |
| beq $1, $skip_aligned_byte_loop_head_up |
| |
| $aligned_byte_loop_head_up: |
| unop |
| ble $18, $egress |
| ldbu $1, 0($5) |
| |
| ldi $18, -1($18) |
| |
| ldi $5, 1($5) |
| stb $1, 0($4) |
| ldi $4, 1($4) |
| |
| and $4, 7, $6 |
| bne $6, $aligned_byte_loop_head_up |
| |
| $skip_aligned_byte_loop_head_up: |
| ldi $18, -8($18) |
| blt $18, $skip_aligned_word_loop_up |
| |
| $aligned_word_loop_up: |
| ldl $1, 0($5) |
| ldi $5, 8($5) |
| ldi $18, -8($18) |
| |
| stl $1, 0($4) |
| ldi $4, 8($4) |
| bge $18, $aligned_word_loop_up |
| |
| $skip_aligned_word_loop_up: |
| ldi $18, 8($18) |
| bgt $18, $byte_loop_tail_up |
| unop |
| ret $31, ($26), 1 |
| |
| .align 4 |
| $misaligned_up: |
| fnop |
| unop |
| beq $18, $egress |
| |
| $byte_loop_tail_up: |
| ldbu $1, 0($5) |
| ldi $18, -1($18) |
| |
| stb $1, 0($4) |
| |
| ldi $5, 1($5) |
| ldi $4, 1($4) |
| bgt $18, $byte_loop_tail_up |
| |
| $egress: |
| ret $31, ($26), 1 |
| |
| .end memmove |
| EXPORT_SYMBOL(memmove) |