diff options
author | Niels Möller <nisse@lysator.liu.se> | 2021-09-09 20:36:59 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2021-09-09 20:36:59 +0200 |
commit | 121290e0a64be203df5961299db01fc26367a2ba (patch) | |
tree | 3dd7cf73e718479552154afa46872b50332feb73 | |
parent | 1f58b09c897cadde4e472bd9cb482883a09bceaa (diff) | |
download | nettle-121290e0a64be203df5961299db01fc26367a2ba.tar.gz |
x86_64: Assembly CBC AES aesni functions.
-rw-r--r-- | ChangeLog | 9 | ||||
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | x86_64/aesni/cbc-aes128-encrypt.asm | 108 | ||||
-rw-r--r-- | x86_64/aesni/cbc-aes192-encrypt.asm | 114 | ||||
-rw-r--r-- | x86_64/aesni/cbc-aes256-encrypt.asm | 121 |
5 files changed, 354 insertions, 0 deletions
@@ -1,3 +1,12 @@ +2021-09-09 Niels Möller <nisse@lysator.liu.se> + + Implementation of CBC-AES for x86_64 aesni. Roughly 40%-50% + speedup benchmarked on Ryzen 5. + * x86_64/aesni/cbc-aes128-encrypt.asm: New file. + * x86_64/aesni/cbc-aes192-encrypt.asm: New file. + * x86_64/aesni/cbc-aes256-encrypt.asm: New file. + * configure.ac (asm_replace_list): Add new asm files. + 2021-09-08 Niels Möller <nisse@lysator.liu.se> * cbc-aes128-encrypt.c (nettle_cbc_aes128_encrypt): New file and diff --git a/configure.ac b/configure.ac index ebec8759..e7778fa8 100644 --- a/configure.ac +++ b/configure.ac @@ -563,6 +563,8 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \ aes192-encrypt.asm aes192-decrypt.asm \ aes256-set-encrypt-key.asm aes256-set-decrypt-key.asm \ aes256-encrypt.asm aes256-decrypt.asm \ + cbc-aes128-encrypt.asm cbc-aes192-encrypt.asm \ + cbc-aes256-encrypt.asm \ arcfour-crypt.asm camellia-crypt-internal.asm \ md5-compress.asm memxor.asm memxor3.asm \ poly1305-internal.asm \ diff --git a/x86_64/aesni/cbc-aes128-encrypt.asm b/x86_64/aesni/cbc-aes128-encrypt.asm new file mode 100644 index 00000000..7375dadd --- /dev/null +++ b/x86_64/aesni/cbc-aes128-encrypt.asm @@ -0,0 +1,108 @@ +C x86_64/aesni/cbc-aes128-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`X', `%xmm11') +define(`BLOCK', `%xmm12') + + .file "cbc-aes128-encrypt.asm" + + C nettle_cbc_aes128_encrypt(struct cbc_aes128_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_cbc_aes128_encrypt) + W64_ENTRY(4, 13) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), X C Load IV + +.Lblock_loop: + movups (SRC), BLOCK C Cleartext block + pxor KEY0, X + pxor BLOCK, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY7, X + aesenc KEY8, X + aesenc KEY9, X + aesenclast KEY10, X + + movups X, (DST) + add $16, SRC + add $16, DST + + dec LENGTH + jnz .Lblock_loop + + C Save IV + movups X, 176(CTX) + +.Lend: + W64_EXIT(4, 13) + ret +EPILOGUE(nettle_cbc_aes128_encrypt) diff --git a/x86_64/aesni/cbc-aes192-encrypt.asm b/x86_64/aesni/cbc-aes192-encrypt.asm new file mode 100644 index 00000000..2438d91f --- /dev/null +++ b/x86_64/aesni/cbc-aes192-encrypt.asm @@ -0,0 +1,114 @@ +C x86_64/aesni/cbc-aes192-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`KEY11', `%xmm11') +define(`KEY12', `%xmm12') +define(`X', `%xmm13') +define(`BLOCK', `%xmm14') + + .file "cbc-aes192-encrypt.asm" + + C nettle_cbc_aes192_encrypt(struct cbc_aes192_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_cbc_aes192_encrypt) + W64_ENTRY(4, 15) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + movups 208(CTX), X C Load IV + +.Lblock_loop: + movups (SRC), BLOCK C Cleartext block + pxor KEY0, X + pxor BLOCK, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY7, X + aesenc KEY8, X + aesenc KEY9, X + aesenc KEY10, X + aesenc KEY11, X + aesenclast KEY12, X + + movups X, (DST) + add $16, SRC + add $16, DST + + dec LENGTH + jnz .Lblock_loop + + C Save IV + movups X, 208(CTX) + +.Lend: + W64_EXIT(4, 15) + ret +EPILOGUE(nettle_cbc_aes192_encrypt) diff --git a/x86_64/aesni/cbc-aes256-encrypt.asm b/x86_64/aesni/cbc-aes256-encrypt.asm new file mode 100644 index 00000000..6b289c70 --- /dev/null +++ b/x86_64/aesni/cbc-aes256-encrypt.asm @@ -0,0 +1,121 @@ +C x86_64/aesni/cbc-aes256-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0_7', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY8', `%xmm7') +define(`KEY9', `%xmm8') +define(`KEY10', `%xmm9') +define(`KEY11', `%xmm10') +define(`KEY12', `%xmm11') +define(`KEY13', `%xmm12') +define(`KEY14', `%xmm13') + +define(`X', `%xmm14') +define(`BLOCK', `%xmm15') + + .file "cbc-aes256-encrypt.asm" + + C nettle_cbc_aes256_encrypt(struct cbc_aes256_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_cbc_aes256_encrypt) + W64_ENTRY(4, 16) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0_7 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + movups 208(CTX), KEY13 + movups 224(CTX), KEY14 + movups 240(CTX), X C Load IV + +.Lblock_loop: + movups (SRC), BLOCK C Cleartext block + pxor KEY0_7, X + movups 112(CTX), KEY0_7 + pxor BLOCK, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY0_7, X + movups (CTX), KEY0_7 + aesenc KEY8, X + aesenc KEY9, X + aesenc KEY10, X + aesenc KEY11, X + aesenc KEY12, X + aesenc KEY13, X + aesenclast KEY14, X + + movups X, (DST) + add $16, SRC + add $16, DST + + dec LENGTH + jnz .Lblock_loop + + C Save IV + movups X, 240(CTX) + +.Lend: + W64_EXIT(4, 16) + ret +EPILOGUE(nettle_cbc_aes256_encrypt) |