summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2021-09-09 20:36:59 +0200
committerNiels Möller <nisse@lysator.liu.se>2021-09-09 20:36:59 +0200
commit121290e0a64be203df5961299db01fc26367a2ba (patch)
tree3dd7cf73e718479552154afa46872b50332feb73
parent1f58b09c897cadde4e472bd9cb482883a09bceaa (diff)
downloadnettle-121290e0a64be203df5961299db01fc26367a2ba.tar.gz
x86_64: Assembly CBC AES aesni functions.
-rw-r--r--ChangeLog9
-rw-r--r--configure.ac2
-rw-r--r--x86_64/aesni/cbc-aes128-encrypt.asm108
-rw-r--r--x86_64/aesni/cbc-aes192-encrypt.asm114
-rw-r--r--x86_64/aesni/cbc-aes256-encrypt.asm121
5 files changed, 354 insertions, 0 deletions
diff --git a/ChangeLog b/ChangeLog
index e6f95784..96da9573 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,12 @@
+2021-09-09 Niels Möller <nisse@lysator.liu.se>
+
+ Implementation of CBC-AES for x86_64 aesni. Roughly 40%-50%
+ speedup benchmarked on Ryzen 5.
+ * x86_64/aesni/cbc-aes128-encrypt.asm: New file.
+ * x86_64/aesni/cbc-aes192-encrypt.asm: New file.
+ * x86_64/aesni/cbc-aes256-encrypt.asm: New file.
+ * configure.ac (asm_replace_list): Add new asm files.
+
2021-09-08 Niels Möller <nisse@lysator.liu.se>
* cbc-aes128-encrypt.c (nettle_cbc_aes128_encrypt): New file and
diff --git a/configure.ac b/configure.ac
index ebec8759..e7778fa8 100644
--- a/configure.ac
+++ b/configure.ac
@@ -563,6 +563,8 @@ asm_replace_list="aes-encrypt-internal.asm aes-decrypt-internal.asm \
aes192-encrypt.asm aes192-decrypt.asm \
aes256-set-encrypt-key.asm aes256-set-decrypt-key.asm \
aes256-encrypt.asm aes256-decrypt.asm \
+ cbc-aes128-encrypt.asm cbc-aes192-encrypt.asm \
+ cbc-aes256-encrypt.asm \
arcfour-crypt.asm camellia-crypt-internal.asm \
md5-compress.asm memxor.asm memxor3.asm \
poly1305-internal.asm \
diff --git a/x86_64/aesni/cbc-aes128-encrypt.asm b/x86_64/aesni/cbc-aes128-encrypt.asm
new file mode 100644
index 00000000..7375dadd
--- /dev/null
+++ b/x86_64/aesni/cbc-aes128-encrypt.asm
@@ -0,0 +1,108 @@
+C x86_64/aesni/cbc-aes128-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`X', `%xmm11')
+define(`BLOCK', `%xmm12')
+
+ .file "cbc-aes128-encrypt.asm"
+
+ C nettle_cbc_aes128_encrypt(struct cbc_aes128_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_cbc_aes128_encrypt)
+ W64_ENTRY(4, 13)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), X C Load IV
+
+.Lblock_loop:
+ movups (SRC), BLOCK C Cleartext block
+ pxor KEY0, X
+ pxor BLOCK, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenclast KEY10, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+
+ dec LENGTH
+ jnz .Lblock_loop
+
+ C Save IV
+ movups X, 176(CTX)
+
+.Lend:
+ W64_EXIT(4, 13)
+ ret
+EPILOGUE(nettle_cbc_aes128_encrypt)
diff --git a/x86_64/aesni/cbc-aes192-encrypt.asm b/x86_64/aesni/cbc-aes192-encrypt.asm
new file mode 100644
index 00000000..2438d91f
--- /dev/null
+++ b/x86_64/aesni/cbc-aes192-encrypt.asm
@@ -0,0 +1,114 @@
+C x86_64/aesni/cbc-aes192-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`X', `%xmm13')
+define(`BLOCK', `%xmm14')
+
+ .file "cbc-aes192-encrypt.asm"
+
+ C nettle_cbc_aes192_encrypt(struct cbc_aes192_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_cbc_aes192_encrypt)
+ W64_ENTRY(4, 15)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), X C Load IV
+
+.Lblock_loop:
+ movups (SRC), BLOCK C Cleartext block
+ pxor KEY0, X
+ pxor BLOCK, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenclast KEY12, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+
+ dec LENGTH
+ jnz .Lblock_loop
+
+ C Save IV
+ movups X, 208(CTX)
+
+.Lend:
+ W64_EXIT(4, 15)
+ ret
+EPILOGUE(nettle_cbc_aes192_encrypt)
diff --git a/x86_64/aesni/cbc-aes256-encrypt.asm b/x86_64/aesni/cbc-aes256-encrypt.asm
new file mode 100644
index 00000000..6b289c70
--- /dev/null
+++ b/x86_64/aesni/cbc-aes256-encrypt.asm
@@ -0,0 +1,121 @@
+C x86_64/aesni/cbc-aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0_7', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY8', `%xmm7')
+define(`KEY9', `%xmm8')
+define(`KEY10', `%xmm9')
+define(`KEY11', `%xmm10')
+define(`KEY12', `%xmm11')
+define(`KEY13', `%xmm12')
+define(`KEY14', `%xmm13')
+
+define(`X', `%xmm14')
+define(`BLOCK', `%xmm15')
+
+ .file "cbc-aes256-encrypt.asm"
+
+ C nettle_cbc_aes256_encrypt(struct cbc_aes256_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_cbc_aes256_encrypt)
+ W64_ENTRY(4, 16)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0_7
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), KEY13
+ movups 224(CTX), KEY14
+ movups 240(CTX), X C Load IV
+
+.Lblock_loop:
+ movups (SRC), BLOCK C Cleartext block
+ pxor KEY0_7, X
+ movups 112(CTX), KEY0_7
+ pxor BLOCK, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY0_7, X
+ movups (CTX), KEY0_7
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenc KEY12, X
+ aesenc KEY13, X
+ aesenclast KEY14, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+
+ dec LENGTH
+ jnz .Lblock_loop
+
+ C Save IV
+ movups X, 240(CTX)
+
+.Lend:
+ W64_EXIT(4, 16)
+ ret
+EPILOGUE(nettle_cbc_aes256_encrypt)