summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog13
-rw-r--r--fat-x86_64.c80
-rw-r--r--x86_64/aesni/aes-decrypt-internal.asm134
-rw-r--r--x86_64/aesni/aes-encrypt-internal.asm134
-rw-r--r--x86_64/aesni/aes128-decrypt.asm136
-rw-r--r--x86_64/aesni/aes128-encrypt.asm136
-rw-r--r--x86_64/aesni/aes192-decrypt.asm146
-rw-r--r--x86_64/aesni/aes192-encrypt.asm146
-rw-r--r--x86_64/aesni/aes256-decrypt.asm113
-rw-r--r--x86_64/aesni/aes256-encrypt.asm113
-rw-r--r--x86_64/fat/aes128-decrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal-2.asm)11
-rw-r--r--x86_64/fat/aes128-encrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal.asm)11
-rw-r--r--x86_64/fat/aes192-decrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal-2.asm)11
-rw-r--r--x86_64/fat/aes192-encrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal.asm)11
-rw-r--r--x86_64/fat/aes256-decrypt-2.asm36
-rw-r--r--x86_64/fat/aes256-encrypt-2.asm36
16 files changed, 955 insertions, 312 deletions
diff --git a/ChangeLog b/ChangeLog
index 1c4f6c32..14609c8a 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2021-08-10 Niels Möller <nisse@lysator.liu.se>
+
+ * x86_64/aesni/aes128-encrypt.asm: New file, with 2-way loop.
+ * x86_64/aesni/aes128-decrypt.asm: Likewise.
+ * x86_64/aesni/aes192-encrypt.asm: Likewise.
+ * x86_64/aesni/aes192-decrypt.asm: Likewise.
+ * x86_64/aesni/aes256-encrypt.asm: New file, but 1-way loop.
+ * x86_64/aesni/aes256-decrypt.asm: Likewise.
+ * x86_64/aesni/aes-encrypt-internal.asm: Deleted.
+ * x86_64/aesni/aes-decrypt-internal.asm: Deleted.
+ * x86_64/fat/: Corresponding new and deleted files.
+ * fat-x86_64.c: Update fat setup accordingly.
+
2021-08-06 Niels Möller <nisse@lysator.liu.se>
S390x xor functions, from Mamone Tarsha:
diff --git a/fat-x86_64.c b/fat-x86_64.c
index a95a592c..80731eef 100644
--- a/fat-x86_64.c
+++ b/fat-x86_64.c
@@ -111,13 +111,24 @@ get_x86_features (struct x86_features *features)
}
}
-DECLARE_FAT_FUNC(_nettle_aes_encrypt, aes_crypt_internal_func)
-DECLARE_FAT_FUNC_VAR(aes_encrypt, aes_crypt_internal_func, x86_64)
-DECLARE_FAT_FUNC_VAR(aes_encrypt, aes_crypt_internal_func, aesni)
-
-DECLARE_FAT_FUNC(_nettle_aes_decrypt, aes_crypt_internal_func)
-DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, x86_64)
-DECLARE_FAT_FUNC_VAR(aes_decrypt, aes_crypt_internal_func, aesni)
+DECLARE_FAT_FUNC(nettle_aes128_encrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC(nettle_aes128_decrypt, aes128_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_encrypt, aes128_crypt_func, aesni)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes128_decrypt, aes128_crypt_func, aesni)
+DECLARE_FAT_FUNC(nettle_aes192_encrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC(nettle_aes192_decrypt, aes192_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_encrypt, aes192_crypt_func, aesni)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes192_decrypt, aes192_crypt_func, aesni)
+DECLARE_FAT_FUNC(nettle_aes256_encrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC(nettle_aes256_decrypt, aes256_crypt_func)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_encrypt, aes256_crypt_func, aesni)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, c)
+DECLARE_FAT_FUNC_VAR(aes256_decrypt, aes256_crypt_func, aesni)
DECLARE_FAT_FUNC(nettle_memxor, memxor_func)
DECLARE_FAT_FUNC_VAR(memxor, memxor_func, x86_64)
@@ -160,15 +171,23 @@ fat_init (void)
{
if (verbose)
fprintf (stderr, "libnettle: using aes instructions.\n");
- _nettle_aes_encrypt_vec = _nettle_aes_encrypt_aesni;
- _nettle_aes_decrypt_vec = _nettle_aes_decrypt_aesni;
+ nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_aesni;
+ nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_aesni;
+ nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_aesni;
+ nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_aesni;
+ nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_aesni;
+ nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_aesni;
}
else
{
if (verbose)
fprintf (stderr, "libnettle: not using aes instructions.\n");
- _nettle_aes_encrypt_vec = _nettle_aes_encrypt_x86_64;
- _nettle_aes_decrypt_vec = _nettle_aes_decrypt_x86_64;
+ nettle_aes128_encrypt_vec = _nettle_aes128_encrypt_c;
+ nettle_aes128_decrypt_vec = _nettle_aes128_decrypt_c;
+ nettle_aes192_encrypt_vec = _nettle_aes192_encrypt_c;
+ nettle_aes192_decrypt_vec = _nettle_aes192_decrypt_c;
+ nettle_aes256_encrypt_vec = _nettle_aes256_encrypt_c;
+ nettle_aes256_decrypt_vec = _nettle_aes256_decrypt_c;
}
if (features.have_sha_ni)
@@ -199,19 +218,32 @@ fat_init (void)
}
}
-DEFINE_FAT_FUNC(_nettle_aes_encrypt, void,
- (unsigned rounds, const uint32_t *keys,
- const struct aes_table *T,
- size_t length, uint8_t *dst,
- const uint8_t *src),
- (rounds, keys, T, length, dst, src))
-
-DEFINE_FAT_FUNC(_nettle_aes_decrypt, void,
- (unsigned rounds, const uint32_t *keys,
- const struct aes_table *T,
- size_t length, uint8_t *dst,
- const uint8_t *src),
- (rounds, keys, T, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes128_encrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes128_decrypt, void,
+ (const struct aes128_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes192_encrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes192_decrypt, void,
+ (const struct aes192_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+
+DEFINE_FAT_FUNC(nettle_aes256_encrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
+DEFINE_FAT_FUNC(nettle_aes256_decrypt, void,
+ (const struct aes256_ctx *ctx, size_t length,
+ uint8_t *dst,const uint8_t *src),
+ (ctx, length, dst, src))
DEFINE_FAT_FUNC(nettle_memxor, void *,
(void *dst, const void *src, size_t n),
diff --git a/x86_64/aesni/aes-decrypt-internal.asm b/x86_64/aesni/aes-decrypt-internal.asm
deleted file mode 100644
index ee960260..00000000
--- a/x86_64/aesni/aes-decrypt-internal.asm
+++ /dev/null
@@ -1,134 +0,0 @@
-C x86_64/aesni/aes-decrypt-internal.asm
-
-
-ifelse(`
- Copyright (C) 2015, 2018 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-')
-
-C Input argument
-define(`ROUNDS', `%rdi')
-define(`KEYS', `%rsi')
-C define(`TABLE', `%rdx') C Unused here
-define(`LENGTH',`%rcx')
-define(`DST', `%r8')
-define(`SRC', `%r9')
-
-define(`KEY0', `%xmm0')
-define(`KEY1', `%xmm1')
-define(`KEY2', `%xmm2')
-define(`KEY3', `%xmm3')
-define(`KEY4', `%xmm4')
-define(`KEY5', `%xmm5')
-define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEYLAST', `%xmm14')
-define(`BLOCK', `%xmm15')
-
- .file "aes-decrypt-internal.asm"
-
- C _aes_decrypt(unsigned rounds, const uint32_t *keys,
- C const struct aes_table *T,
- C size_t length, uint8_t *dst,
- C uint8_t *src)
- .text
- ALIGN(16)
-PROLOGUE(_nettle_aes_decrypt)
- W64_ENTRY(6, 16)
- shr $4, LENGTH
- test LENGTH, LENGTH
- jz .Lend
-
- movups (KEYS), KEY0
- movups 16(KEYS), KEY1
- movups 32(KEYS), KEY2
- movups 48(KEYS), KEY3
- movups 64(KEYS), KEY4
- movups 80(KEYS), KEY5
- movups 96(KEYS), KEY6
- movups 112(KEYS), KEY7
- movups 128(KEYS), KEY8
- movups 144(KEYS), KEY9
- lea 160(KEYS), KEYS
- sub $10, XREG(ROUNDS) C Also clears high half
- je .Lkey_last
-
- movups (KEYS), KEY10
- movups 16(KEYS), KEY11
- lea (KEYS, ROUNDS, 8), KEYS
- lea (KEYS, ROUNDS, 8), KEYS
-
- cmpl $2, XREG(ROUNDS)
- je .Lkey_last
- movups -32(KEYS), KEY12
- movups -16(KEYS), KEY13
-
-.Lkey_last:
- movups (KEYS), KEYLAST
-
-.Lblock_loop:
- movups (SRC), BLOCK
- pxor KEY0, BLOCK
- aesdec KEY1, BLOCK
- aesdec KEY2, BLOCK
- aesdec KEY3, BLOCK
- aesdec KEY4, BLOCK
- aesdec KEY5, BLOCK
- aesdec KEY6, BLOCK
- aesdec KEY7, BLOCK
- aesdec KEY8, BLOCK
- aesdec KEY9, BLOCK
- testl XREG(ROUNDS), XREG(ROUNDS)
- je .Lblock_end
- aesdec KEY10, BLOCK
- aesdec KEY11, BLOCK
- cmpl $2, XREG(ROUNDS)
- je .Lblock_end
-
- aesdec KEY12, BLOCK
- aesdec KEY13, BLOCK
-
-.Lblock_end:
- aesdeclast KEYLAST, BLOCK
-
- movups BLOCK, (DST)
- add $16, SRC
- add $16, DST
- dec LENGTH
- jnz .Lblock_loop
-
-.Lend:
- W64_EXIT(6, 16)
- ret
-EPILOGUE(_nettle_aes_decrypt)
diff --git a/x86_64/aesni/aes-encrypt-internal.asm b/x86_64/aesni/aes-encrypt-internal.asm
deleted file mode 100644
index 36ed6df6..00000000
--- a/x86_64/aesni/aes-encrypt-internal.asm
+++ /dev/null
@@ -1,134 +0,0 @@
-C x86_64/aesni/aes-encrypt-internal.asm
-
-
-ifelse(`
- Copyright (C) 2015, 2018 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-')
-
-C Input argument
-define(`ROUNDS', `%rdi')
-define(`KEYS', `%rsi')
-C define(`TABLE', `%rdx') C Unused here
-define(`LENGTH',`%rcx')
-define(`DST', `%r8')
-define(`SRC', `%r9')
-
-define(`KEY0', `%xmm0')
-define(`KEY1', `%xmm1')
-define(`KEY2', `%xmm2')
-define(`KEY3', `%xmm3')
-define(`KEY4', `%xmm4')
-define(`KEY5', `%xmm5')
-define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEYLAST', `%xmm14')
-define(`BLOCK', `%xmm15')
-
- .file "aes-encrypt-internal.asm"
-
- C _aes_encrypt(unsigned rounds, const uint32_t *keys,
- C const struct aes_table *T,
- C size_t length, uint8_t *dst,
- C uint8_t *src)
- .text
- ALIGN(16)
-PROLOGUE(_nettle_aes_encrypt)
- W64_ENTRY(6, 16)
- shr $4, LENGTH
- test LENGTH, LENGTH
- jz .Lend
-
- movups (KEYS), KEY0
- movups 16(KEYS), KEY1
- movups 32(KEYS), KEY2
- movups 48(KEYS), KEY3
- movups 64(KEYS), KEY4
- movups 80(KEYS), KEY5
- movups 96(KEYS), KEY6
- movups 112(KEYS), KEY7
- movups 128(KEYS), KEY8
- movups 144(KEYS), KEY9
- lea 160(KEYS), KEYS
- sub $10, XREG(ROUNDS) C Also clears high half
- je .Lkey_last
-
- movups (KEYS), KEY10
- movups 16(KEYS), KEY11
- lea (KEYS, ROUNDS, 8), KEYS
- lea (KEYS, ROUNDS, 8), KEYS
-
- cmpl $2, XREG(ROUNDS)
- je .Lkey_last
- movups -32(KEYS), KEY12
- movups -16(KEYS), KEY13
-
-.Lkey_last:
- movups (KEYS), KEYLAST
-
-.Lblock_loop:
- movups (SRC), BLOCK
- pxor KEY0, BLOCK
- aesenc KEY1, BLOCK
- aesenc KEY2, BLOCK
- aesenc KEY3, BLOCK
- aesenc KEY4, BLOCK
- aesenc KEY5, BLOCK
- aesenc KEY6, BLOCK
- aesenc KEY7, BLOCK
- aesenc KEY8, BLOCK
- aesenc KEY9, BLOCK
- testl XREG(ROUNDS), XREG(ROUNDS)
- je .Lblock_end
- aesenc KEY10, BLOCK
- aesenc KEY11, BLOCK
- cmpl $2, XREG(ROUNDS)
- je .Lblock_end
-
- aesenc KEY12, BLOCK
- aesenc KEY13, BLOCK
-
-.Lblock_end:
- aesenclast KEYLAST, BLOCK
-
- movups BLOCK, (DST)
- add $16, SRC
- add $16, DST
- dec LENGTH
- jnz .Lblock_loop
-
-.Lend:
- W64_EXIT(6, 16)
- ret
-EPILOGUE(_nettle_aes_encrypt)
diff --git a/x86_64/aesni/aes128-decrypt.asm b/x86_64/aesni/aes128-decrypt.asm
new file mode 100644
index 00000000..79111e47
--- /dev/null
+++ b/x86_64/aesni/aes128-decrypt.asm
@@ -0,0 +1,136 @@
+C x86_64/aesni/aes128-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`X', `%xmm11')
+define(`Y', `%xmm12')
+
+ .file "aes128-decrypt.asm"
+
+ C nettle_aes128_decrypt(const struct aes128_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes128_decrypt)
+ W64_ENTRY(4, 13)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdeclast KEY10, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesdec KEY1, X
+ aesdec KEY1, Y
+ aesdec KEY2, X
+ aesdec KEY2, Y
+ aesdec KEY3, X
+ aesdec KEY3, Y
+ aesdec KEY4, X
+ aesdec KEY4, Y
+ aesdec KEY5, X
+ aesdec KEY5, Y
+ aesdec KEY6, X
+ aesdec KEY6, Y
+ aesdec KEY7, X
+ aesdec KEY7, Y
+ aesdec KEY8, X
+ aesdec KEY8, Y
+ aesdec KEY9, X
+ aesdec KEY9, Y
+ aesdeclast KEY10, X
+ aesdeclast KEY10, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 13)
+ ret
+EPILOGUE(nettle_aes128_decrypt)
diff --git a/x86_64/aesni/aes128-encrypt.asm b/x86_64/aesni/aes128-encrypt.asm
new file mode 100644
index 00000000..8e7ebe78
--- /dev/null
+++ b/x86_64/aesni/aes128-encrypt.asm
@@ -0,0 +1,136 @@
+C x86_64/aesni/aes128-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`X', `%xmm11')
+define(`Y', `%xmm12')
+
+ .file "aes128-encrypt.asm"
+
+ C nettle_aes128_encrypt(const struct aes128_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes128_encrypt)
+ W64_ENTRY(4, 13)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenclast KEY10, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesenc KEY1, X
+ aesenc KEY1, Y
+ aesenc KEY2, X
+ aesenc KEY2, Y
+ aesenc KEY3, X
+ aesenc KEY3, Y
+ aesenc KEY4, X
+ aesenc KEY4, Y
+ aesenc KEY5, X
+ aesenc KEY5, Y
+ aesenc KEY6, X
+ aesenc KEY6, Y
+ aesenc KEY7, X
+ aesenc KEY7, Y
+ aesenc KEY8, X
+ aesenc KEY8, Y
+ aesenc KEY9, X
+ aesenc KEY9, Y
+ aesenclast KEY10, X
+ aesenclast KEY10, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 13)
+ ret
+EPILOGUE(nettle_aes128_encrypt)
diff --git a/x86_64/aesni/aes192-decrypt.asm b/x86_64/aesni/aes192-decrypt.asm
new file mode 100644
index 00000000..399f89b6
--- /dev/null
+++ b/x86_64/aesni/aes192-decrypt.asm
@@ -0,0 +1,146 @@
+C x86_64/aesni/aes192-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`X', `%xmm13')
+define(`Y', `%xmm14')
+
+ .file "aes192-decrypt.asm"
+
+ C nettle_aes192_decrypt(const struct aes192_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes192_decrypt)
+ W64_ENTRY(4, 15)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdec KEY10, X
+ aesdec KEY11, X
+ aesdeclast KEY12, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesdec KEY1, X
+ aesdec KEY1, Y
+ aesdec KEY2, X
+ aesdec KEY2, Y
+ aesdec KEY3, X
+ aesdec KEY3, Y
+ aesdec KEY4, X
+ aesdec KEY4, Y
+ aesdec KEY5, X
+ aesdec KEY5, Y
+ aesdec KEY6, X
+ aesdec KEY6, Y
+ aesdec KEY7, X
+ aesdec KEY7, Y
+ aesdec KEY8, X
+ aesdec KEY8, Y
+ aesdec KEY9, X
+ aesdec KEY9, Y
+ aesdec KEY10, X
+ aesdec KEY10, Y
+ aesdec KEY11, X
+ aesdec KEY11, Y
+ aesdeclast KEY12, X
+ aesdeclast KEY12, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 15)
+ ret
+EPILOGUE(nettle_aes192_decrypt)
diff --git a/x86_64/aesni/aes192-encrypt.asm b/x86_64/aesni/aes192-encrypt.asm
new file mode 100644
index 00000000..67271b83
--- /dev/null
+++ b/x86_64/aesni/aes192-encrypt.asm
@@ -0,0 +1,146 @@
+C x86_64/aesni/aes192-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`X', `%xmm13')
+define(`Y', `%xmm14')
+
+ .file "aes192-encrypt.asm"
+
+ C nettle_aes192_encrypt(const struct aes192_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes192_encrypt)
+ W64_ENTRY(4, 15)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenclast KEY12, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesenc KEY1, X
+ aesenc KEY1, Y
+ aesenc KEY2, X
+ aesenc KEY2, Y
+ aesenc KEY3, X
+ aesenc KEY3, Y
+ aesenc KEY4, X
+ aesenc KEY4, Y
+ aesenc KEY5, X
+ aesenc KEY5, Y
+ aesenc KEY6, X
+ aesenc KEY6, Y
+ aesenc KEY7, X
+ aesenc KEY7, Y
+ aesenc KEY8, X
+ aesenc KEY8, Y
+ aesenc KEY9, X
+ aesenc KEY9, Y
+ aesenc KEY10, X
+ aesenc KEY10, Y
+ aesenc KEY11, X
+ aesenc KEY11, Y
+ aesenclast KEY12, X
+ aesenclast KEY12, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 15)
+ ret
+EPILOGUE(nettle_aes192_encrypt)
diff --git a/x86_64/aesni/aes256-decrypt.asm b/x86_64/aesni/aes256-decrypt.asm
new file mode 100644
index 00000000..122f1db6
--- /dev/null
+++ b/x86_64/aesni/aes256-decrypt.asm
@@ -0,0 +1,113 @@
+C x86_64/aesni/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEY14', `%xmm14')
+define(`X', `%xmm15')
+
+ .file "aes256-decrypt.asm"
+
+ C nettle_aes256_decrypt(const struct aes256_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes256_decrypt)
+ W64_ENTRY(4, 16)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), KEY13
+ movups 224(CTX), KEY14
+
+.Lblock_loop:
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdec KEY10, X
+ aesdec KEY11, X
+ aesdec KEY12, X
+ aesdec KEY13, X
+ aesdeclast KEY14, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 16)
+ ret
+EPILOGUE(nettle_aes256_decrypt)
diff --git a/x86_64/aesni/aes256-encrypt.asm b/x86_64/aesni/aes256-encrypt.asm
new file mode 100644
index 00000000..b261a237
--- /dev/null
+++ b/x86_64/aesni/aes256-encrypt.asm
@@ -0,0 +1,113 @@
+C x86_64/aesni/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEY14', `%xmm14')
+define(`X', `%xmm15')
+
+ .file "aes256-encrypt.asm"
+
+ C nettle_aes256_encrypt(const struct aes256_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes256_encrypt)
+ W64_ENTRY(4, 16)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), KEY13
+ movups 224(CTX), KEY14
+
+.Lblock_loop:
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenc KEY12, X
+ aesenc KEY13, X
+ aesenclast KEY14, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 16)
+ ret
+EPILOGUE(nettle_aes256_encrypt)
diff --git a/x86_64/fat/aes-encrypt-internal-2.asm b/x86_64/fat/aes128-decrypt-2.asm
index c0a4f3d9..4015ee5a 100644
--- a/x86_64/fat/aes-encrypt-internal-2.asm
+++ b/x86_64/fat/aes128-decrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-encrypt-internal-2.asm
-
+C x86_64/fat/aes128-decrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_aesni')
-include_src(`x86_64/aesni/aes-encrypt-internal.asm')
+dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes128-decrypt.asm')
diff --git a/x86_64/fat/aes-encrypt-internal.asm b/x86_64/fat/aes128-encrypt-2.asm
index 0f7f0134..b49ddca6 100644
--- a/x86_64/fat/aes-encrypt-internal.asm
+++ b/x86_64/fat/aes128-encrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-encrypt-internal.asm
-
+C x86_64/fat/aes128-encrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/aes-encrypt-internal.asm')
+dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes128-encrypt.asm')
diff --git a/x86_64/fat/aes-decrypt-internal-2.asm b/x86_64/fat/aes192-decrypt-2.asm
index cd6a72e2..713610e1 100644
--- a/x86_64/fat/aes-decrypt-internal-2.asm
+++ b/x86_64/fat/aes192-decrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-decrypt-internal-2.asm
-
+C x86_64/fat/aes192-decrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_aesni')
-include_src(`x86_64/aesni/aes-decrypt-internal.asm')
+dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes192-decrypt.asm')
diff --git a/x86_64/fat/aes-decrypt-internal.asm b/x86_64/fat/aes192-encrypt-2.asm
index 4b9e8f16..ee2bbf56 100644
--- a/x86_64/fat/aes-decrypt-internal.asm
+++ b/x86_64/fat/aes192-encrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-decrypt-internal.asm
-
+C x86_64/fat/aes192-encrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/aes-decrypt-internal.asm')
+dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes192-encrypt.asm')
diff --git a/x86_64/fat/aes256-decrypt-2.asm b/x86_64/fat/aes256-decrypt-2.asm
new file mode 100644
index 00000000..d596b257
--- /dev/null
+++ b/x86_64/fat/aes256-decrypt-2.asm
@@ -0,0 +1,36 @@
+C x86_64/fat/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes256-decrypt.asm')
diff --git a/x86_64/fat/aes256-encrypt-2.asm b/x86_64/fat/aes256-encrypt-2.asm
new file mode 100644
index 00000000..9aa3c13e
--- /dev/null
+++ b/x86_64/fat/aes256-encrypt-2.asm
@@ -0,0 +1,36 @@
+C x86_64/fat/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes256-encrypt.asm')