| From e478d121fa3504e65776ee03ac145d50a166fe9d Mon Sep 17 00:00:00 2001 |
| From: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
| Date: Tue, 11 Oct 2016 19:15:17 +0100 |
| Subject: [PATCH] crypto: arm64/aes-ccm-ce: fix for big endian |
| |
| commit 56e4e76c68fcb51547b5299e5b66a135935ff414 upstream. |
| |
| The AES-CCM implementation that uses ARMv8 Crypto Extensions instructions |
| refers to the AES round keys as pairs of 64-bit quantities, which causes |
| failures when building the code for big endian. In addition, it byte swaps |
| the input counter unconditionally, while this is only required for little |
| endian builds. So fix both issues. |
| |
| Fixes: 12ac3efe74f8 ("arm64/crypto: use crypto instructions to generate AES key schedule") |
| Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org> |
| Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au> |
| Signed-off-by: Paul Gortmaker <paul.gortmaker@windriver.com> |
| |
| diff --git a/arch/arm64/crypto/aes-ce-ccm-core.S b/arch/arm64/crypto/aes-ce-ccm-core.S |
| index a2a7fbcacc14..3363560c79b7 100644 |
| --- a/arch/arm64/crypto/aes-ce-ccm-core.S |
| +++ b/arch/arm64/crypto/aes-ce-ccm-core.S |
| @@ -9,6 +9,7 @@ |
| */ |
| |
| #include <linux/linkage.h> |
| +#include <asm/assembler.h> |
| |
| .text |
| .arch armv8-a+crypto |
| @@ -19,7 +20,7 @@ |
| */ |
| ENTRY(ce_aes_ccm_auth_data) |
| ldr w8, [x3] /* leftover from prev round? */ |
| - ld1 {v0.2d}, [x0] /* load mac */ |
| + ld1 {v0.16b}, [x0] /* load mac */ |
| cbz w8, 1f |
| sub w8, w8, #16 |
| eor v1.16b, v1.16b, v1.16b |
| @@ -31,7 +32,7 @@ ENTRY(ce_aes_ccm_auth_data) |
| beq 8f /* out of input? */ |
| cbnz w8, 0b |
| eor v0.16b, v0.16b, v1.16b |
| -1: ld1 {v3.2d}, [x4] /* load first round key */ |
| +1: ld1 {v3.16b}, [x4] /* load first round key */ |
| prfm pldl1strm, [x1] |
| cmp w5, #12 /* which key size? */ |
| add x6, x4, #16 |
| @@ -41,17 +42,17 @@ ENTRY(ce_aes_ccm_auth_data) |
| mov v5.16b, v3.16b |
| b 4f |
| 2: mov v4.16b, v3.16b |
| - ld1 {v5.2d}, [x6], #16 /* load 2nd round key */ |
| + ld1 {v5.16b}, [x6], #16 /* load 2nd round key */ |
| 3: aese v0.16b, v4.16b |
| aesmc v0.16b, v0.16b |
| -4: ld1 {v3.2d}, [x6], #16 /* load next round key */ |
| +4: ld1 {v3.16b}, [x6], #16 /* load next round key */ |
| aese v0.16b, v5.16b |
| aesmc v0.16b, v0.16b |
| -5: ld1 {v4.2d}, [x6], #16 /* load next round key */ |
| +5: ld1 {v4.16b}, [x6], #16 /* load next round key */ |
| subs w7, w7, #3 |
| aese v0.16b, v3.16b |
| aesmc v0.16b, v0.16b |
| - ld1 {v5.2d}, [x6], #16 /* load next round key */ |
| + ld1 {v5.16b}, [x6], #16 /* load next round key */ |
| bpl 3b |
| aese v0.16b, v4.16b |
| subs w2, w2, #16 /* last data? */ |
| @@ -60,7 +61,7 @@ ENTRY(ce_aes_ccm_auth_data) |
| ld1 {v1.16b}, [x1], #16 /* load next input block */ |
| eor v0.16b, v0.16b, v1.16b /* xor with mac */ |
| bne 1b |
| -6: st1 {v0.2d}, [x0] /* store mac */ |
| +6: st1 {v0.16b}, [x0] /* store mac */ |
| beq 10f |
| adds w2, w2, #16 |
| beq 10f |
| @@ -79,7 +80,7 @@ ENTRY(ce_aes_ccm_auth_data) |
| adds w7, w7, #1 |
| bne 9b |
| eor v0.16b, v0.16b, v1.16b |
| - st1 {v0.2d}, [x0] |
| + st1 {v0.16b}, [x0] |
| 10: str w8, [x3] |
| ret |
| ENDPROC(ce_aes_ccm_auth_data) |
| @@ -89,27 +90,27 @@ ENDPROC(ce_aes_ccm_auth_data) |
| * u32 rounds); |
| */ |
| ENTRY(ce_aes_ccm_final) |
| - ld1 {v3.2d}, [x2], #16 /* load first round key */ |
| - ld1 {v0.2d}, [x0] /* load mac */ |
| + ld1 {v3.16b}, [x2], #16 /* load first round key */ |
| + ld1 {v0.16b}, [x0] /* load mac */ |
| cmp w3, #12 /* which key size? */ |
| sub w3, w3, #2 /* modified # of rounds */ |
| - ld1 {v1.2d}, [x1] /* load 1st ctriv */ |
| + ld1 {v1.16b}, [x1] /* load 1st ctriv */ |
| bmi 0f |
| bne 3f |
| mov v5.16b, v3.16b |
| b 2f |
| 0: mov v4.16b, v3.16b |
| -1: ld1 {v5.2d}, [x2], #16 /* load next round key */ |
| +1: ld1 {v5.16b}, [x2], #16 /* load next round key */ |
| aese v0.16b, v4.16b |
| aesmc v0.16b, v0.16b |
| aese v1.16b, v4.16b |
| aesmc v1.16b, v1.16b |
| -2: ld1 {v3.2d}, [x2], #16 /* load next round key */ |
| +2: ld1 {v3.16b}, [x2], #16 /* load next round key */ |
| aese v0.16b, v5.16b |
| aesmc v0.16b, v0.16b |
| aese v1.16b, v5.16b |
| aesmc v1.16b, v1.16b |
| -3: ld1 {v4.2d}, [x2], #16 /* load next round key */ |
| +3: ld1 {v4.16b}, [x2], #16 /* load next round key */ |
| subs w3, w3, #3 |
| aese v0.16b, v3.16b |
| aesmc v0.16b, v0.16b |
| @@ -120,47 +121,47 @@ ENTRY(ce_aes_ccm_final) |
| aese v1.16b, v4.16b |
| /* final round key cancels out */ |
| eor v0.16b, v0.16b, v1.16b /* en-/decrypt the mac */ |
| - st1 {v0.2d}, [x0] /* store result */ |
| + st1 {v0.16b}, [x0] /* store result */ |
| ret |
| ENDPROC(ce_aes_ccm_final) |
| |
| .macro aes_ccm_do_crypt,enc |
| ldr x8, [x6, #8] /* load lower ctr */ |
| - ld1 {v0.2d}, [x5] /* load mac */ |
| - rev x8, x8 /* keep swabbed ctr in reg */ |
| + ld1 {v0.16b}, [x5] /* load mac */ |
| +CPU_LE( rev x8, x8 ) /* keep swabbed ctr in reg */ |
| 0: /* outer loop */ |
| - ld1 {v1.1d}, [x6] /* load upper ctr */ |
| + ld1 {v1.8b}, [x6] /* load upper ctr */ |
| prfm pldl1strm, [x1] |
| add x8, x8, #1 |
| rev x9, x8 |
| cmp w4, #12 /* which key size? */ |
| sub w7, w4, #2 /* get modified # of rounds */ |
| ins v1.d[1], x9 /* no carry in lower ctr */ |
| - ld1 {v3.2d}, [x3] /* load first round key */ |
| + ld1 {v3.16b}, [x3] /* load first round key */ |
| add x10, x3, #16 |
| bmi 1f |
| bne 4f |
| mov v5.16b, v3.16b |
| b 3f |
| 1: mov v4.16b, v3.16b |
| - ld1 {v5.2d}, [x10], #16 /* load 2nd round key */ |
| + ld1 {v5.16b}, [x10], #16 /* load 2nd round key */ |
| 2: /* inner loop: 3 rounds, 2x interleaved */ |
| aese v0.16b, v4.16b |
| aesmc v0.16b, v0.16b |
| aese v1.16b, v4.16b |
| aesmc v1.16b, v1.16b |
| -3: ld1 {v3.2d}, [x10], #16 /* load next round key */ |
| +3: ld1 {v3.16b}, [x10], #16 /* load next round key */ |
| aese v0.16b, v5.16b |
| aesmc v0.16b, v0.16b |
| aese v1.16b, v5.16b |
| aesmc v1.16b, v1.16b |
| -4: ld1 {v4.2d}, [x10], #16 /* load next round key */ |
| +4: ld1 {v4.16b}, [x10], #16 /* load next round key */ |
| subs w7, w7, #3 |
| aese v0.16b, v3.16b |
| aesmc v0.16b, v0.16b |
| aese v1.16b, v3.16b |
| aesmc v1.16b, v1.16b |
| - ld1 {v5.2d}, [x10], #16 /* load next round key */ |
| + ld1 {v5.16b}, [x10], #16 /* load next round key */ |
| bpl 2b |
| aese v0.16b, v4.16b |
| aese v1.16b, v4.16b |
| @@ -177,14 +178,14 @@ ENDPROC(ce_aes_ccm_final) |
| eor v0.16b, v0.16b, v2.16b /* xor mac with pt ^ rk[last] */ |
| st1 {v1.16b}, [x0], #16 /* write output block */ |
| bne 0b |
| - rev x8, x8 |
| - st1 {v0.2d}, [x5] /* store mac */ |
| +CPU_LE( rev x8, x8 ) |
| + st1 {v0.16b}, [x5] /* store mac */ |
| str x8, [x6, #8] /* store lsb end of ctr (BE) */ |
| 5: ret |
| |
| 6: eor v0.16b, v0.16b, v5.16b /* final round mac */ |
| eor v1.16b, v1.16b, v5.16b /* final round enc */ |
| - st1 {v0.2d}, [x5] /* store mac */ |
| + st1 {v0.16b}, [x5] /* store mac */ |
| add w2, w2, #16 /* process partial tail block */ |
| 7: ldrb w9, [x1], #1 /* get 1 byte of input */ |
| umov w6, v1.b[0] /* get top crypted ctr byte */ |
| -- |
| 2.10.1 |
| |