diff options
author | Niels Möller <nisse@lysator.liu.se> | 2021-08-10 22:05:40 +0200 |
---|---|---|
committer | Niels Möller <nisse@lysator.liu.se> | 2021-08-10 22:05:40 +0200 |
commit | c7391e5cdb8a0afc05186d484bc9f752b8f0c074 (patch) | |
tree | f107d002f14a51005cd230d00a3c0d34d9ab464b /x86_64 | |
parent | d351a828579f1ffd0a837d68ed3c7f1c7d808f38 (diff) | |
download | nettle-c7391e5cdb8a0afc05186d484bc9f752b8f0c074.tar.gz |
x86_64: Refactor aesni assembly, with specific functions for each key size.
Diffstat (limited to 'x86_64')
-rw-r--r-- | x86_64/aesni/aes-decrypt-internal.asm | 134 | ||||
-rw-r--r-- | x86_64/aesni/aes-encrypt-internal.asm | 134 | ||||
-rw-r--r-- | x86_64/aesni/aes128-decrypt.asm | 136 | ||||
-rw-r--r-- | x86_64/aesni/aes128-encrypt.asm | 136 | ||||
-rw-r--r-- | x86_64/aesni/aes192-decrypt.asm | 146 | ||||
-rw-r--r-- | x86_64/aesni/aes192-encrypt.asm | 146 | ||||
-rw-r--r-- | x86_64/aesni/aes256-decrypt.asm | 113 | ||||
-rw-r--r-- | x86_64/aesni/aes256-encrypt.asm | 113 | ||||
-rw-r--r-- | x86_64/fat/aes128-decrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal-2.asm) | 11 | ||||
-rw-r--r-- | x86_64/fat/aes128-encrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal.asm) | 11 | ||||
-rw-r--r-- | x86_64/fat/aes192-decrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal-2.asm) | 11 | ||||
-rw-r--r-- | x86_64/fat/aes192-encrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal.asm) | 11 | ||||
-rw-r--r-- | x86_64/fat/aes256-decrypt-2.asm | 36 | ||||
-rw-r--r-- | x86_64/fat/aes256-encrypt-2.asm | 36 |
14 files changed, 886 insertions, 288 deletions
diff --git a/x86_64/aesni/aes-decrypt-internal.asm b/x86_64/aesni/aes-decrypt-internal.asm deleted file mode 100644 index ee960260..00000000 --- a/x86_64/aesni/aes-decrypt-internal.asm +++ /dev/null @@ -1,134 +0,0 @@ -C x86_64/aesni/aes-decrypt-internal.asm - - -ifelse(` - Copyright (C) 2015, 2018 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -') - -C Input argument -define(`ROUNDS', `%rdi') -define(`KEYS', `%rsi') -C define(`TABLE', `%rdx') C Unused here -define(`LENGTH',`%rcx') -define(`DST', `%r8') -define(`SRC', `%r9') - -define(`KEY0', `%xmm0') -define(`KEY1', `%xmm1') -define(`KEY2', `%xmm2') -define(`KEY3', `%xmm3') -define(`KEY4', `%xmm4') -define(`KEY5', `%xmm5') -define(`KEY6', `%xmm6') -define(`KEY7', `%xmm7') -define(`KEY8', `%xmm8') -define(`KEY9', `%xmm9') -define(`KEY10', `%xmm10') -define(`KEY11', `%xmm11') -define(`KEY12', `%xmm12') -define(`KEY13', `%xmm13') -define(`KEYLAST', `%xmm14') -define(`BLOCK', `%xmm15') - - .file "aes-decrypt-internal.asm" - - C _aes_decrypt(unsigned rounds, const uint32_t *keys, - C const struct aes_table *T, - C size_t length, uint8_t *dst, - C uint8_t *src) - .text - ALIGN(16) -PROLOGUE(_nettle_aes_decrypt) - W64_ENTRY(6, 16) - shr $4, LENGTH - test LENGTH, LENGTH - jz .Lend - - movups (KEYS), KEY0 - movups 16(KEYS), KEY1 - movups 32(KEYS), KEY2 - movups 48(KEYS), KEY3 - movups 64(KEYS), KEY4 - movups 80(KEYS), KEY5 - movups 96(KEYS), KEY6 - movups 112(KEYS), KEY7 - movups 128(KEYS), KEY8 - movups 144(KEYS), KEY9 - lea 160(KEYS), KEYS - sub $10, XREG(ROUNDS) C Also clears high half - je .Lkey_last - - movups (KEYS), KEY10 - movups 16(KEYS), KEY11 - lea (KEYS, ROUNDS, 8), KEYS - lea (KEYS, ROUNDS, 8), KEYS - - cmpl $2, XREG(ROUNDS) - je .Lkey_last - movups -32(KEYS), KEY12 - movups -16(KEYS), KEY13 - -.Lkey_last: - movups (KEYS), KEYLAST - -.Lblock_loop: - movups (SRC), BLOCK - pxor KEY0, BLOCK - aesdec KEY1, BLOCK - aesdec KEY2, BLOCK - aesdec KEY3, BLOCK - aesdec KEY4, BLOCK - aesdec KEY5, BLOCK - aesdec KEY6, BLOCK - aesdec KEY7, BLOCK - aesdec KEY8, BLOCK - aesdec KEY9, BLOCK - testl XREG(ROUNDS), XREG(ROUNDS) - je .Lblock_end - aesdec KEY10, BLOCK - aesdec KEY11, BLOCK - cmpl $2, XREG(ROUNDS) - je .Lblock_end - - aesdec KEY12, BLOCK - aesdec KEY13, BLOCK - -.Lblock_end: - aesdeclast KEYLAST, BLOCK - - movups BLOCK, (DST) - add $16, SRC - add $16, DST - dec LENGTH - jnz .Lblock_loop - -.Lend: - W64_EXIT(6, 16) - ret -EPILOGUE(_nettle_aes_decrypt) diff --git a/x86_64/aesni/aes-encrypt-internal.asm b/x86_64/aesni/aes-encrypt-internal.asm deleted file mode 100644 index 36ed6df6..00000000 --- a/x86_64/aesni/aes-encrypt-internal.asm +++ /dev/null @@ -1,134 +0,0 @@ -C x86_64/aesni/aes-encrypt-internal.asm - - -ifelse(` - Copyright (C) 2015, 2018 Niels Möller - - This file is part of GNU Nettle. - - GNU Nettle is free software: you can redistribute it and/or - modify it under the terms of either: - - * the GNU Lesser General Public License as published by the Free - Software Foundation; either version 3 of the License, or (at your - option) any later version. - - or - - * the GNU General Public License as published by the Free - Software Foundation; either version 2 of the License, or (at your - option) any later version. - - or both in parallel, as here. - - GNU Nettle is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - General Public License for more details. - - You should have received copies of the GNU General Public License and - the GNU Lesser General Public License along with this program. If - not, see http://www.gnu.org/licenses/. -') - -C Input argument -define(`ROUNDS', `%rdi') -define(`KEYS', `%rsi') -C define(`TABLE', `%rdx') C Unused here -define(`LENGTH',`%rcx') -define(`DST', `%r8') -define(`SRC', `%r9') - -define(`KEY0', `%xmm0') -define(`KEY1', `%xmm1') -define(`KEY2', `%xmm2') -define(`KEY3', `%xmm3') -define(`KEY4', `%xmm4') -define(`KEY5', `%xmm5') -define(`KEY6', `%xmm6') -define(`KEY7', `%xmm7') -define(`KEY8', `%xmm8') -define(`KEY9', `%xmm9') -define(`KEY10', `%xmm10') -define(`KEY11', `%xmm11') -define(`KEY12', `%xmm12') -define(`KEY13', `%xmm13') -define(`KEYLAST', `%xmm14') -define(`BLOCK', `%xmm15') - - .file "aes-encrypt-internal.asm" - - C _aes_encrypt(unsigned rounds, const uint32_t *keys, - C const struct aes_table *T, - C size_t length, uint8_t *dst, - C uint8_t *src) - .text - ALIGN(16) -PROLOGUE(_nettle_aes_encrypt) - W64_ENTRY(6, 16) - shr $4, LENGTH - test LENGTH, LENGTH - jz .Lend - - movups (KEYS), KEY0 - movups 16(KEYS), KEY1 - movups 32(KEYS), KEY2 - movups 48(KEYS), KEY3 - movups 64(KEYS), KEY4 - movups 80(KEYS), KEY5 - movups 96(KEYS), KEY6 - movups 112(KEYS), KEY7 - movups 128(KEYS), KEY8 - movups 144(KEYS), KEY9 - lea 160(KEYS), KEYS - sub $10, XREG(ROUNDS) C Also clears high half - je .Lkey_last - - movups (KEYS), KEY10 - movups 16(KEYS), KEY11 - lea (KEYS, ROUNDS, 8), KEYS - lea (KEYS, ROUNDS, 8), KEYS - - cmpl $2, XREG(ROUNDS) - je .Lkey_last - movups -32(KEYS), KEY12 - movups -16(KEYS), KEY13 - -.Lkey_last: - movups (KEYS), KEYLAST - -.Lblock_loop: - movups (SRC), BLOCK - pxor KEY0, BLOCK - aesenc KEY1, BLOCK - aesenc KEY2, BLOCK - aesenc KEY3, BLOCK - aesenc KEY4, BLOCK - aesenc KEY5, BLOCK - aesenc KEY6, BLOCK - aesenc KEY7, BLOCK - aesenc KEY8, BLOCK - aesenc KEY9, BLOCK - testl XREG(ROUNDS), XREG(ROUNDS) - je .Lblock_end - aesenc KEY10, BLOCK - aesenc KEY11, BLOCK - cmpl $2, XREG(ROUNDS) - je .Lblock_end - - aesenc KEY12, BLOCK - aesenc KEY13, BLOCK - -.Lblock_end: - aesenclast KEYLAST, BLOCK - - movups BLOCK, (DST) - add $16, SRC - add $16, DST - dec LENGTH - jnz .Lblock_loop - -.Lend: - W64_EXIT(6, 16) - ret -EPILOGUE(_nettle_aes_encrypt) diff --git a/x86_64/aesni/aes128-decrypt.asm b/x86_64/aesni/aes128-decrypt.asm new file mode 100644 index 00000000..79111e47 --- /dev/null +++ b/x86_64/aesni/aes128-decrypt.asm @@ -0,0 +1,136 @@ +C x86_64/aesni/aes128-decrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`X', `%xmm11') +define(`Y', `%xmm12') + + .file "aes128-decrypt.asm" + + C nettle_aes128_decrypt(const struct aes128_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes128_decrypt) + W64_ENTRY(4, 13) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + shr LENGTH + jnc .Lblock_loop + + movups (SRC), X + pxor KEY0, X + aesdec KEY1, X + aesdec KEY2, X + aesdec KEY3, X + aesdec KEY4, X + aesdec KEY5, X + aesdec KEY6, X + aesdec KEY7, X + aesdec KEY8, X + aesdec KEY9, X + aesdeclast KEY10, X + + movups X, (DST) + add $16, SRC + add $16, DST + test LENGTH, LENGTH + jz .Lend + +.Lblock_loop: + movups (SRC), X + movups 16(SRC), Y + pxor KEY0, X + pxor KEY0, Y + aesdec KEY1, X + aesdec KEY1, Y + aesdec KEY2, X + aesdec KEY2, Y + aesdec KEY3, X + aesdec KEY3, Y + aesdec KEY4, X + aesdec KEY4, Y + aesdec KEY5, X + aesdec KEY5, Y + aesdec KEY6, X + aesdec KEY6, Y + aesdec KEY7, X + aesdec KEY7, Y + aesdec KEY8, X + aesdec KEY8, Y + aesdec KEY9, X + aesdec KEY9, Y + aesdeclast KEY10, X + aesdeclast KEY10, Y + + movups X, (DST) + movups Y, 16(DST) + add $32, SRC + add $32, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 13) + ret +EPILOGUE(nettle_aes128_decrypt) diff --git a/x86_64/aesni/aes128-encrypt.asm b/x86_64/aesni/aes128-encrypt.asm new file mode 100644 index 00000000..8e7ebe78 --- /dev/null +++ b/x86_64/aesni/aes128-encrypt.asm @@ -0,0 +1,136 @@ +C x86_64/aesni/aes128-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`X', `%xmm11') +define(`Y', `%xmm12') + + .file "aes128-encrypt.asm" + + C nettle_aes128_encrypt(const struct aes128_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes128_encrypt) + W64_ENTRY(4, 13) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + shr LENGTH + jnc .Lblock_loop + + movups (SRC), X + pxor KEY0, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY7, X + aesenc KEY8, X + aesenc KEY9, X + aesenclast KEY10, X + + movups X, (DST) + add $16, SRC + add $16, DST + test LENGTH, LENGTH + jz .Lend + +.Lblock_loop: + movups (SRC), X + movups 16(SRC), Y + pxor KEY0, X + pxor KEY0, Y + aesenc KEY1, X + aesenc KEY1, Y + aesenc KEY2, X + aesenc KEY2, Y + aesenc KEY3, X + aesenc KEY3, Y + aesenc KEY4, X + aesenc KEY4, Y + aesenc KEY5, X + aesenc KEY5, Y + aesenc KEY6, X + aesenc KEY6, Y + aesenc KEY7, X + aesenc KEY7, Y + aesenc KEY8, X + aesenc KEY8, Y + aesenc KEY9, X + aesenc KEY9, Y + aesenclast KEY10, X + aesenclast KEY10, Y + + movups X, (DST) + movups Y, 16(DST) + add $32, SRC + add $32, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 13) + ret +EPILOGUE(nettle_aes128_encrypt) diff --git a/x86_64/aesni/aes192-decrypt.asm b/x86_64/aesni/aes192-decrypt.asm new file mode 100644 index 00000000..399f89b6 --- /dev/null +++ b/x86_64/aesni/aes192-decrypt.asm @@ -0,0 +1,146 @@ +C x86_64/aesni/aes192-decrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`KEY11', `%xmm11') +define(`KEY12', `%xmm12') +define(`X', `%xmm13') +define(`Y', `%xmm14') + + .file "aes192-decrypt.asm" + + C nettle_aes192_decrypt(const struct aes192_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes192_decrypt) + W64_ENTRY(4, 15) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + shr LENGTH + jnc .Lblock_loop + + movups (SRC), X + pxor KEY0, X + aesdec KEY1, X + aesdec KEY2, X + aesdec KEY3, X + aesdec KEY4, X + aesdec KEY5, X + aesdec KEY6, X + aesdec KEY7, X + aesdec KEY8, X + aesdec KEY9, X + aesdec KEY10, X + aesdec KEY11, X + aesdeclast KEY12, X + + movups X, (DST) + add $16, SRC + add $16, DST + test LENGTH, LENGTH + jz .Lend + +.Lblock_loop: + movups (SRC), X + movups 16(SRC), Y + pxor KEY0, X + pxor KEY0, Y + aesdec KEY1, X + aesdec KEY1, Y + aesdec KEY2, X + aesdec KEY2, Y + aesdec KEY3, X + aesdec KEY3, Y + aesdec KEY4, X + aesdec KEY4, Y + aesdec KEY5, X + aesdec KEY5, Y + aesdec KEY6, X + aesdec KEY6, Y + aesdec KEY7, X + aesdec KEY7, Y + aesdec KEY8, X + aesdec KEY8, Y + aesdec KEY9, X + aesdec KEY9, Y + aesdec KEY10, X + aesdec KEY10, Y + aesdec KEY11, X + aesdec KEY11, Y + aesdeclast KEY12, X + aesdeclast KEY12, Y + + movups X, (DST) + movups Y, 16(DST) + add $32, SRC + add $32, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 15) + ret +EPILOGUE(nettle_aes192_decrypt) diff --git a/x86_64/aesni/aes192-encrypt.asm b/x86_64/aesni/aes192-encrypt.asm new file mode 100644 index 00000000..67271b83 --- /dev/null +++ b/x86_64/aesni/aes192-encrypt.asm @@ -0,0 +1,146 @@ +C x86_64/aesni/aes192-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`KEY11', `%xmm11') +define(`KEY12', `%xmm12') +define(`X', `%xmm13') +define(`Y', `%xmm14') + + .file "aes192-encrypt.asm" + + C nettle_aes192_encrypt(const struct aes192_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes192_encrypt) + W64_ENTRY(4, 15) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + shr LENGTH + jnc .Lblock_loop + + movups (SRC), X + pxor KEY0, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY7, X + aesenc KEY8, X + aesenc KEY9, X + aesenc KEY10, X + aesenc KEY11, X + aesenclast KEY12, X + + movups X, (DST) + add $16, SRC + add $16, DST + test LENGTH, LENGTH + jz .Lend + +.Lblock_loop: + movups (SRC), X + movups 16(SRC), Y + pxor KEY0, X + pxor KEY0, Y + aesenc KEY1, X + aesenc KEY1, Y + aesenc KEY2, X + aesenc KEY2, Y + aesenc KEY3, X + aesenc KEY3, Y + aesenc KEY4, X + aesenc KEY4, Y + aesenc KEY5, X + aesenc KEY5, Y + aesenc KEY6, X + aesenc KEY6, Y + aesenc KEY7, X + aesenc KEY7, Y + aesenc KEY8, X + aesenc KEY8, Y + aesenc KEY9, X + aesenc KEY9, Y + aesenc KEY10, X + aesenc KEY10, Y + aesenc KEY11, X + aesenc KEY11, Y + aesenclast KEY12, X + aesenclast KEY12, Y + + movups X, (DST) + movups Y, 16(DST) + add $32, SRC + add $32, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 15) + ret +EPILOGUE(nettle_aes192_encrypt) diff --git a/x86_64/aesni/aes256-decrypt.asm b/x86_64/aesni/aes256-decrypt.asm new file mode 100644 index 00000000..122f1db6 --- /dev/null +++ b/x86_64/aesni/aes256-decrypt.asm @@ -0,0 +1,113 @@ +C x86_64/aesni/aes256-decrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`KEY11', `%xmm11') +define(`KEY12', `%xmm12') +define(`KEY13', `%xmm13') +define(`KEY14', `%xmm14') +define(`X', `%xmm15') + + .file "aes256-decrypt.asm" + + C nettle_aes256_decrypt(const struct aes256_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes256_decrypt) + W64_ENTRY(4, 16) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + movups 208(CTX), KEY13 + movups 224(CTX), KEY14 + +.Lblock_loop: + movups (SRC), X + pxor KEY0, X + aesdec KEY1, X + aesdec KEY2, X + aesdec KEY3, X + aesdec KEY4, X + aesdec KEY5, X + aesdec KEY6, X + aesdec KEY7, X + aesdec KEY8, X + aesdec KEY9, X + aesdec KEY10, X + aesdec KEY11, X + aesdec KEY12, X + aesdec KEY13, X + aesdeclast KEY14, X + + movups X, (DST) + add $16, SRC + add $16, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 16) + ret +EPILOGUE(nettle_aes256_decrypt) diff --git a/x86_64/aesni/aes256-encrypt.asm b/x86_64/aesni/aes256-encrypt.asm new file mode 100644 index 00000000..b261a237 --- /dev/null +++ b/x86_64/aesni/aes256-encrypt.asm @@ -0,0 +1,113 @@ +C x86_64/aesni/aes256-encrypt.asm + +ifelse(` + Copyright (C) 2015, 2018, 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +C Input argument +define(`CTX', `%rdi') +define(`LENGTH',`%rsi') +define(`DST', `%rdx') +define(`SRC', `%rcx') + +define(`KEY0', `%xmm0') +define(`KEY1', `%xmm1') +define(`KEY2', `%xmm2') +define(`KEY3', `%xmm3') +define(`KEY4', `%xmm4') +define(`KEY5', `%xmm5') +define(`KEY6', `%xmm6') +define(`KEY7', `%xmm7') +define(`KEY8', `%xmm8') +define(`KEY9', `%xmm9') +define(`KEY10', `%xmm10') +define(`KEY11', `%xmm11') +define(`KEY12', `%xmm12') +define(`KEY13', `%xmm13') +define(`KEY14', `%xmm14') +define(`X', `%xmm15') + + .file "aes256-encrypt.asm" + + C nettle_aes256_encrypt(const struct aes256_ctx *ctx, + C size_t length, uint8_t *dst, + C const uint8_t *src); + + .text + ALIGN(16) +PROLOGUE(nettle_aes256_encrypt) + W64_ENTRY(4, 16) + shr $4, LENGTH + test LENGTH, LENGTH + jz .Lend + + movups (CTX), KEY0 + movups 16(CTX), KEY1 + movups 32(CTX), KEY2 + movups 48(CTX), KEY3 + movups 64(CTX), KEY4 + movups 80(CTX), KEY5 + movups 96(CTX), KEY6 + movups 112(CTX), KEY7 + movups 128(CTX), KEY8 + movups 144(CTX), KEY9 + movups 160(CTX), KEY10 + movups 176(CTX), KEY11 + movups 192(CTX), KEY12 + movups 208(CTX), KEY13 + movups 224(CTX), KEY14 + +.Lblock_loop: + movups (SRC), X + pxor KEY0, X + aesenc KEY1, X + aesenc KEY2, X + aesenc KEY3, X + aesenc KEY4, X + aesenc KEY5, X + aesenc KEY6, X + aesenc KEY7, X + aesenc KEY8, X + aesenc KEY9, X + aesenc KEY10, X + aesenc KEY11, X + aesenc KEY12, X + aesenc KEY13, X + aesenclast KEY14, X + + movups X, (DST) + add $16, SRC + add $16, DST + dec LENGTH + jnz .Lblock_loop + +.Lend: + W64_EXIT(4, 16) + ret +EPILOGUE(nettle_aes256_encrypt) diff --git a/x86_64/fat/aes-encrypt-internal-2.asm b/x86_64/fat/aes128-decrypt-2.asm index c0a4f3d9..4015ee5a 100644 --- a/x86_64/fat/aes-encrypt-internal-2.asm +++ b/x86_64/fat/aes128-decrypt-2.asm @@ -1,8 +1,7 @@ -C x86_64/fat/aes-encrypt-internal-2.asm - +C x86_64/fat/aes128-decrypt.asm ifelse(` - Copyright (C) 2015 Niels Möller + Copyright (C) 2021 Niels Möller This file is part of GNU Nettle. @@ -31,5 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -define(`fat_transform', `$1_aesni') -include_src(`x86_64/aesni/aes-encrypt-internal.asm') +dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes128-decrypt.asm') diff --git a/x86_64/fat/aes-encrypt-internal.asm b/x86_64/fat/aes128-encrypt-2.asm index 0f7f0134..b49ddca6 100644 --- a/x86_64/fat/aes-encrypt-internal.asm +++ b/x86_64/fat/aes128-encrypt-2.asm @@ -1,8 +1,7 @@ -C x86_64/fat/aes-encrypt-internal.asm - +C x86_64/fat/aes128-encrypt.asm ifelse(` - Copyright (C) 2015 Niels Möller + Copyright (C) 2021 Niels Möller This file is part of GNU Nettle. @@ -31,5 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -define(`fat_transform', `$1_x86_64') -include_src(`x86_64/aes-encrypt-internal.asm') +dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes128-encrypt.asm') diff --git a/x86_64/fat/aes-decrypt-internal-2.asm b/x86_64/fat/aes192-decrypt-2.asm index cd6a72e2..713610e1 100644 --- a/x86_64/fat/aes-decrypt-internal-2.asm +++ b/x86_64/fat/aes192-decrypt-2.asm @@ -1,8 +1,7 @@ -C x86_64/fat/aes-decrypt-internal-2.asm - +C x86_64/fat/aes192-decrypt.asm ifelse(` - Copyright (C) 2015 Niels Möller + Copyright (C) 2021 Niels Möller This file is part of GNU Nettle. @@ -31,5 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -define(`fat_transform', `$1_aesni') -include_src(`x86_64/aesni/aes-decrypt-internal.asm') +dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes192-decrypt.asm') diff --git a/x86_64/fat/aes-decrypt-internal.asm b/x86_64/fat/aes192-encrypt-2.asm index 4b9e8f16..ee2bbf56 100644 --- a/x86_64/fat/aes-decrypt-internal.asm +++ b/x86_64/fat/aes192-encrypt-2.asm @@ -1,8 +1,7 @@ -C x86_64/fat/aes-decrypt-internal.asm - +C x86_64/fat/aes192-encrypt.asm ifelse(` - Copyright (C) 2015 Niels Möller + Copyright (C) 2021 Niels Möller This file is part of GNU Nettle. @@ -31,5 +30,7 @@ ifelse(` not, see http://www.gnu.org/licenses/. ') -define(`fat_transform', `$1_x86_64') -include_src(`x86_64/aes-decrypt-internal.asm') +dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes192-encrypt.asm') diff --git a/x86_64/fat/aes256-decrypt-2.asm b/x86_64/fat/aes256-decrypt-2.asm new file mode 100644 index 00000000..d596b257 --- /dev/null +++ b/x86_64/fat/aes256-decrypt-2.asm @@ -0,0 +1,36 @@ +C x86_64/fat/aes256-decrypt.asm + +ifelse(` + Copyright (C) 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes256-decrypt.asm') diff --git a/x86_64/fat/aes256-encrypt-2.asm b/x86_64/fat/aes256-encrypt-2.asm new file mode 100644 index 00000000..9aa3c13e --- /dev/null +++ b/x86_64/fat/aes256-encrypt-2.asm @@ -0,0 +1,36 @@ +C x86_64/fat/aes256-encrypt.asm + +ifelse(` + Copyright (C) 2021 Niels Möller + + This file is part of GNU Nettle. + + GNU Nettle is free software: you can redistribute it and/or + modify it under the terms of either: + + * the GNU Lesser General Public License as published by the Free + Software Foundation; either version 3 of the License, or (at your + option) any later version. + + or + + * the GNU General Public License as published by the Free + Software Foundation; either version 2 of the License, or (at your + option) any later version. + + or both in parallel, as here. + + GNU Nettle is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received copies of the GNU General Public License and + the GNU Lesser General Public License along with this program. If + not, see http://www.gnu.org/licenses/. +') + +dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure + +define(`fat_transform', `_$1_aesni') +include_src(`x86_64/aesni/aes256-encrypt.asm') |