| /* |
| * Copyright (c) 2011-2014, Intel Corporation |
| * Authors: Fenghua Yu <fenghua.yu@intel.com>, |
| * H. Peter Anvin <hpa@linux.intel.com> |
| * |
| * This program is free software; you can redistribute it and/or modify it |
| * under the terms and conditions of the GNU General Public License, |
| * version 2, as published by the Free Software Foundation. |
| * |
| * This program is distributed in the hope it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
| * more details. |
| * |
| * You should have received a copy of the GNU General Public License along with |
| * this program; if not, write to the Free Software Foundation, Inc., |
| * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| */ |
| |
| #if defined(__i386__) || defined(__x86_64__) |
| |
| #define ENTRY(x) \ |
| .balign 64 ; \ |
| .globl x ; \ |
| x: |
| |
| #define ENDPROC(x) \ |
| .size x, .-x ; \ |
| .type x, @function |
| |
| #define RDRAND_RETRY_LIMIT 10 |
| |
| #ifdef __x86_64__ |
| |
| ENTRY(x86_rdrand_bytes) |
| mov %esi, %eax |
| 1: |
| mov $RDRAND_RETRY_LIMIT, %ecx |
| 2: |
| .byte 0x48,0x0f,0xc7,0xf2 /* rdrand %rdx */ |
| jnc 3f |
| mov %rdx, (%rdi) |
| add $8, %rdi |
| sub $8, %esi |
| ja 1b |
| 4: |
| sub %esi, %eax |
| ret |
| 3: |
| dec %ecx |
| rep;nop |
| jnz 2b |
| jmp 4b |
| ENDPROC(x86_rdrand_bytes) |
| |
| ENTRY(x86_rdseed_or_rdrand_bytes) |
| mov (%rsi), %r8d /* RDSEED count */ |
| mov (%rcx), %r9d /* RDRAND count */ |
| 1: |
| mov $RDRAND_RETRY_LIMIT, %r10d |
| 2: |
| .byte 0x48,0x0f,0xc7,0xf8 /* rdseed %rax */ |
| jnc 3f |
| mov %rax, (%rdi) |
| add $8, %rdi |
| sub $8, %r8d |
| ja 1b |
| 4: |
| sub %r8d, (%rsi) |
| sub %r9d, (%rcx) |
| ret |
| 3: |
| .byte 0x48,0x0f,0xc7,0xf0 /* rdrand %rax */ |
| jnc 5f |
| mov %rax, (%rdx) |
| add $8, %rdx |
| sub $8, %r9d |
| ja 1b |
| jmp 4b |
| 5: |
| dec %r10d |
| rep;nop |
| jnz 2b |
| jmp 4b |
| ENDPROC(x86_rdseed_or_rdrand_bytes) |
| |
| #define SETPTR(var,ptr) leaq var(%rip),ptr |
| #define PTR0 %rdi |
| #define PTR1 %rsi |
| #define PTR2 %rcx |
| #define CTR3 %eax |
| #define NPTR2 1 /* %rcx = %r1, only 0-7 valid here */ |
| |
| #elif defined(__i386__) |
| |
| ENTRY(x86_rdrand_bytes) |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| movl 8(%ebp), %edi |
| movl 12(%ebp), %esi |
| |
| mov %esi, %eax |
| 1: |
| mov $RDRAND_RETRY_LIMIT, %ecx |
| 2: |
| .byte 0x0f,0xc7,0xf2 /* rdrand %edx */ |
| jnc 3f |
| mov %edx, (%edi) |
| add $4, %edi |
| sub $4, %esi |
| ja 1b |
| 4: |
| sub %esi, %eax |
| pop %esi |
| pop %edi |
| pop %ebp |
| ret |
| 3: |
| dec %ecx |
| rep;nop |
| jnz 2b |
| jmp 4b |
| ENDPROC(x86_rdrand_bytes) |
| |
| |
| ENTRY(x86_rdseed_or_rdrand_bytes) |
| push %ebp |
| mov %esp, %ebp |
| push %edi |
| push %esi |
| push %ebx |
| |
| mov 12(%ebp), %ebx |
| mov 20(%ebp), %esi |
| mov 8(%ebp), %edi /* RDSEED pointer */ |
| mov 16(%ebp), %edx /* RDRAND pointer */ |
| mov (%ebx), %ebx /* RDSEED count */ |
| mov (%esi), %esi /* RDRAND count */ |
| 1: |
| mov $RDRAND_RETRY_LIMIT, %ecx |
| 2: |
| .byte 0x0f,0xc7,0xf8 /* rdseed %eax */ |
| jnc 3f |
| mov %eax, (%edi) |
| add $4, %edi |
| sub $4, %ebx |
| ja 1b |
| 4: |
| mov 12(%ebp), %edx |
| mov 20(%ebp), %eax |
| sub %ebx, (%edx) /* RDSEED count */ |
| sub %esi, (%eax) /* RDRAND count */ |
| |
| pop %ebx |
| pop %esi |
| pop %edi |
| pop %ebp |
| ret |
| 3: |
| .byte 0x0f,0xc7,0xf0 /* rdrand %eax */ |
| jnc 5f |
| mov %eax, (%edx) |
| add $4, %edx |
| sub $4, %esi |
| jnz 1b |
| ja 4b |
| 5: |
| dec %ecx |
| rep;nop |
| jnz 2b |
| jmp 4b |
| ENDPROC(x86_rdseed_or_rdrand_bytes) |
| |
| #define SETPTR(var,ptr) movl $(var),ptr |
| #define PTR0 %eax |
| #define PTR1 %edx |
| #define PTR2 %ecx |
| #define CTR3 %esi |
| #define NPTR2 1 /* %rcx = %r1 */ |
| |
| #endif |
| |
| ENTRY(x86_aes_mangle) |
| #ifdef __i386__ |
| push %ebp |
| mov %esp, %ebp |
| movl 8(%ebp), %eax |
| movl 12(%ebp), %edx |
| push %esi |
| #endif |
| movl $512, CTR3 /* Number of rounds */ |
| |
| movdqa (0*16)(PTR1), %xmm0 |
| movdqa (1*16)(PTR1), %xmm1 |
| movdqa (2*16)(PTR1), %xmm2 |
| movdqa (3*16)(PTR1), %xmm3 |
| movdqa (4*16)(PTR1), %xmm4 |
| movdqa (5*16)(PTR1), %xmm5 |
| movdqa (6*16)(PTR1), %xmm6 |
| movdqa (7*16)(PTR1), %xmm7 |
| |
| #ifdef __x86_64__ |
| SETPTR(aes_round_keys, PTR2) |
| 1: |
| #else |
| 1: |
| SETPTR(aes_round_keys, PTR2) |
| #endif |
| |
| /* 8192 = 512 (rounds) * 16 (bytes) */ |
| pxor (0*8192)(PTR0), %xmm0 |
| pxor (1*8192)(PTR0), %xmm1 |
| pxor (2*8192)(PTR0), %xmm2 |
| pxor (3*8192)(PTR0), %xmm3 |
| pxor (4*8192)(PTR0), %xmm4 |
| pxor (5*8192)(PTR0), %xmm5 |
| pxor (6*8192)(PTR0), %xmm6 |
| pxor (7*8192)(PTR0), %xmm7 |
| add $16, PTR0 |
| |
| offset = 0 |
| .rept 10 |
| #ifdef __x86_64__ |
| movdqa offset(PTR2), %xmm8 |
| offset = offset + 16 |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xc0 /* aesenc %xmm8, %xmm0 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xc8 /* aesenc %xmm8, %xmm1 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xd0 /* aesenc %xmm8, %xmm2 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xd8 /* aesenc %xmm8, %xmm3 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xe0 /* aesenc %xmm8, %xmm4 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xe8 /* aesenc %xmm8, %xmm5 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xf0 /* aesenc %xmm8, %xmm6 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdc,0xf8 /* aesenc %xmm8, %xmm7 */ |
| #else |
| .byte 0x66,0x0f,0x38,0xdc,0x00+NPTR2 /* aesenc (PTR2), %xmm0 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x08+NPTR2 /* aesenc (PTR2), %xmm1 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x10+NPTR2 /* aesenc (PTR2), %xmm2 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x18+NPTR2 /* aesenc (PTR2), %xmm3 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x20+NPTR2 /* aesenc (PTR2), %xmm4 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x28+NPTR2 /* aesenc (PTR2), %xmm5 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x30+NPTR2 /* aesenc (PTR2), %xmm6 */ |
| .byte 0x66,0x0f,0x38,0xdc,0x38+NPTR2 /* aesenc (PTR2), %xmm7 */ |
| add $16, PTR2 |
| #endif |
| .endr |
| |
| #ifdef __x86_64__ |
| movdqa offset(PTR2), %xmm8 |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xc0 /* aesenclast %xmm8, %xmm0 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xc8 /* aesenclast %xmm8, %xmm1 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xd0 /* aesenclast %xmm8, %xmm2 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xd8 /* aesenclast %xmm8, %xmm3 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xe0 /* aesenclast %xmm8, %xmm4 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xe8 /* aesenclast %xmm8, %xmm5 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xf0 /* aesenclast %xmm8, %xmm6 */ |
| .byte 0x66,0x41,0x0f,0x38,0xdd,0xf8 /* aesenclast %xmm8, %xmm7 */ |
| #else |
| .byte 0x66,0x0f,0x38,0xdd,0x00+NPTR2 /* aesenclast (PTR2), %xmm0 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x08+NPTR2 /* aesenclast (PTR2), %xmm1 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x10+NPTR2 /* aesenclast (PTR2), %xmm2 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x18+NPTR2 /* aesenclast (PTR2), %xmm3 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x20+NPTR2 /* aesenclast (PTR2), %xmm4 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x28+NPTR2 /* aesenclast (PTR2), %xmm5 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x30+NPTR2 /* aesenclast (PTR2), %xmm6 */ |
| .byte 0x66,0x0f,0x38,0xdd,0x38+NPTR2 /* aesenclast (PTR2), %xmm7 */ |
| #endif |
| sub $1, CTR3 |
| jnz 1b |
| |
| movdqa %xmm0, (0*16)(PTR1) |
| movdqa %xmm1, (1*16)(PTR1) |
| movdqa %xmm2, (2*16)(PTR1) |
| movdqa %xmm3, (3*16)(PTR1) |
| movdqa %xmm4, (4*16)(PTR1) |
| movdqa %xmm5, (5*16)(PTR1) |
| movdqa %xmm6, (6*16)(PTR1) |
| movdqa %xmm7, (7*16)(PTR1) |
| |
| #ifdef __i386__ |
| pop %esi |
| pop %ebp |
| #endif |
| ret |
| ENDPROC(x86_aes_mangle) |
| |
| /* aeskeygenassist $imm,%xmm0,%xmm1 */ |
| #define AESKEYGENASSIST(imm) .byte 0x66,0x0f,0x3a,0xdf,0xc8,imm |
| |
| ENTRY(x86_aes_expand_key) |
| #ifdef __i386__ |
| push %ebp |
| mov %esp, %ebp |
| movl 8(%ebp), %eax |
| #endif |
| |
| SETPTR(aes_round_keys, PTR1) |
| movdqu (PTR0), %xmm0 |
| movdqa %xmm0, (PTR1) /* First slot = the plain key */ |
| add $16, PTR1 |
| |
| AESKEYGENASSIST(0x01) |
| call 1f |
| AESKEYGENASSIST(0x02) |
| call 1f |
| AESKEYGENASSIST(0x04) |
| call 1f |
| AESKEYGENASSIST(0x08) |
| call 1f |
| AESKEYGENASSIST(0x10) |
| call 1f |
| AESKEYGENASSIST(0x20) |
| call 1f |
| AESKEYGENASSIST(0x40) |
| call 1f |
| AESKEYGENASSIST(0x80) |
| call 1f |
| AESKEYGENASSIST(0x1b) |
| call 1f |
| AESKEYGENASSIST(0x36) |
| call 1f |
| |
| #ifdef __i386__ |
| pop %ebp |
| #endif |
| ret |
| |
| 1: |
| pshufd $0xff, %xmm1, %xmm1 |
| movdqa %xmm0, %xmm2 |
| pslldq $4, %xmm2 |
| pxor %xmm2, %xmm0 |
| pslldq $4, %xmm2 |
| pxor %xmm2, %xmm0 |
| pslldq $4, %xmm2 |
| pxor %xmm2, %xmm0 |
| pxor %xmm1, %xmm0 |
| movdqa %xmm0, (PTR1) |
| add $16, PTR1 |
| ret |
| |
| ENDPROC(x86_aes_expand_key) |
| |
| .bss |
| .balign 64 |
| aes_round_keys: |
| .space 11*16 |
| .size aes_round_keys, .-aes_round_keys |
| |
| #endif /* i386 or x86_64 */ |
| |
| /* |
| * This is necessary to keep the whole executable |
| * from needing a writable stack. |
| */ |
| .section .note.GNU-stack,"",%progbits |