summaryrefslogtreecommitdiff
path: root/x86_64
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2021-08-10 22:05:40 +0200
committerNiels Möller <nisse@lysator.liu.se>2021-08-10 22:05:40 +0200
commitc7391e5cdb8a0afc05186d484bc9f752b8f0c074 (patch)
treef107d002f14a51005cd230d00a3c0d34d9ab464b /x86_64
parentd351a828579f1ffd0a837d68ed3c7f1c7d808f38 (diff)
downloadnettle-c7391e5cdb8a0afc05186d484bc9f752b8f0c074.tar.gz
x86_64: Refactor aesni assembly, with specific functions for each key size.
Diffstat (limited to 'x86_64')
-rw-r--r--x86_64/aesni/aes-decrypt-internal.asm134
-rw-r--r--x86_64/aesni/aes-encrypt-internal.asm134
-rw-r--r--x86_64/aesni/aes128-decrypt.asm136
-rw-r--r--x86_64/aesni/aes128-encrypt.asm136
-rw-r--r--x86_64/aesni/aes192-decrypt.asm146
-rw-r--r--x86_64/aesni/aes192-encrypt.asm146
-rw-r--r--x86_64/aesni/aes256-decrypt.asm113
-rw-r--r--x86_64/aesni/aes256-encrypt.asm113
-rw-r--r--x86_64/fat/aes128-decrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal-2.asm)11
-rw-r--r--x86_64/fat/aes128-encrypt-2.asm (renamed from x86_64/fat/aes-encrypt-internal.asm)11
-rw-r--r--x86_64/fat/aes192-decrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal-2.asm)11
-rw-r--r--x86_64/fat/aes192-encrypt-2.asm (renamed from x86_64/fat/aes-decrypt-internal.asm)11
-rw-r--r--x86_64/fat/aes256-decrypt-2.asm36
-rw-r--r--x86_64/fat/aes256-encrypt-2.asm36
14 files changed, 886 insertions, 288 deletions
diff --git a/x86_64/aesni/aes-decrypt-internal.asm b/x86_64/aesni/aes-decrypt-internal.asm
deleted file mode 100644
index ee960260..00000000
--- a/x86_64/aesni/aes-decrypt-internal.asm
+++ /dev/null
@@ -1,134 +0,0 @@
-C x86_64/aesni/aes-decrypt-internal.asm
-
-
-ifelse(`
- Copyright (C) 2015, 2018 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-')
-
-C Input argument
-define(`ROUNDS', `%rdi')
-define(`KEYS', `%rsi')
-C define(`TABLE', `%rdx') C Unused here
-define(`LENGTH',`%rcx')
-define(`DST', `%r8')
-define(`SRC', `%r9')
-
-define(`KEY0', `%xmm0')
-define(`KEY1', `%xmm1')
-define(`KEY2', `%xmm2')
-define(`KEY3', `%xmm3')
-define(`KEY4', `%xmm4')
-define(`KEY5', `%xmm5')
-define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEYLAST', `%xmm14')
-define(`BLOCK', `%xmm15')
-
- .file "aes-decrypt-internal.asm"
-
- C _aes_decrypt(unsigned rounds, const uint32_t *keys,
- C const struct aes_table *T,
- C size_t length, uint8_t *dst,
- C uint8_t *src)
- .text
- ALIGN(16)
-PROLOGUE(_nettle_aes_decrypt)
- W64_ENTRY(6, 16)
- shr $4, LENGTH
- test LENGTH, LENGTH
- jz .Lend
-
- movups (KEYS), KEY0
- movups 16(KEYS), KEY1
- movups 32(KEYS), KEY2
- movups 48(KEYS), KEY3
- movups 64(KEYS), KEY4
- movups 80(KEYS), KEY5
- movups 96(KEYS), KEY6
- movups 112(KEYS), KEY7
- movups 128(KEYS), KEY8
- movups 144(KEYS), KEY9
- lea 160(KEYS), KEYS
- sub $10, XREG(ROUNDS) C Also clears high half
- je .Lkey_last
-
- movups (KEYS), KEY10
- movups 16(KEYS), KEY11
- lea (KEYS, ROUNDS, 8), KEYS
- lea (KEYS, ROUNDS, 8), KEYS
-
- cmpl $2, XREG(ROUNDS)
- je .Lkey_last
- movups -32(KEYS), KEY12
- movups -16(KEYS), KEY13
-
-.Lkey_last:
- movups (KEYS), KEYLAST
-
-.Lblock_loop:
- movups (SRC), BLOCK
- pxor KEY0, BLOCK
- aesdec KEY1, BLOCK
- aesdec KEY2, BLOCK
- aesdec KEY3, BLOCK
- aesdec KEY4, BLOCK
- aesdec KEY5, BLOCK
- aesdec KEY6, BLOCK
- aesdec KEY7, BLOCK
- aesdec KEY8, BLOCK
- aesdec KEY9, BLOCK
- testl XREG(ROUNDS), XREG(ROUNDS)
- je .Lblock_end
- aesdec KEY10, BLOCK
- aesdec KEY11, BLOCK
- cmpl $2, XREG(ROUNDS)
- je .Lblock_end
-
- aesdec KEY12, BLOCK
- aesdec KEY13, BLOCK
-
-.Lblock_end:
- aesdeclast KEYLAST, BLOCK
-
- movups BLOCK, (DST)
- add $16, SRC
- add $16, DST
- dec LENGTH
- jnz .Lblock_loop
-
-.Lend:
- W64_EXIT(6, 16)
- ret
-EPILOGUE(_nettle_aes_decrypt)
diff --git a/x86_64/aesni/aes-encrypt-internal.asm b/x86_64/aesni/aes-encrypt-internal.asm
deleted file mode 100644
index 36ed6df6..00000000
--- a/x86_64/aesni/aes-encrypt-internal.asm
+++ /dev/null
@@ -1,134 +0,0 @@
-C x86_64/aesni/aes-encrypt-internal.asm
-
-
-ifelse(`
- Copyright (C) 2015, 2018 Niels Möller
-
- This file is part of GNU Nettle.
-
- GNU Nettle is free software: you can redistribute it and/or
- modify it under the terms of either:
-
- * the GNU Lesser General Public License as published by the Free
- Software Foundation; either version 3 of the License, or (at your
- option) any later version.
-
- or
-
- * the GNU General Public License as published by the Free
- Software Foundation; either version 2 of the License, or (at your
- option) any later version.
-
- or both in parallel, as here.
-
- GNU Nettle is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
- General Public License for more details.
-
- You should have received copies of the GNU General Public License and
- the GNU Lesser General Public License along with this program. If
- not, see http://www.gnu.org/licenses/.
-')
-
-C Input argument
-define(`ROUNDS', `%rdi')
-define(`KEYS', `%rsi')
-C define(`TABLE', `%rdx') C Unused here
-define(`LENGTH',`%rcx')
-define(`DST', `%r8')
-define(`SRC', `%r9')
-
-define(`KEY0', `%xmm0')
-define(`KEY1', `%xmm1')
-define(`KEY2', `%xmm2')
-define(`KEY3', `%xmm3')
-define(`KEY4', `%xmm4')
-define(`KEY5', `%xmm5')
-define(`KEY6', `%xmm6')
-define(`KEY7', `%xmm7')
-define(`KEY8', `%xmm8')
-define(`KEY9', `%xmm9')
-define(`KEY10', `%xmm10')
-define(`KEY11', `%xmm11')
-define(`KEY12', `%xmm12')
-define(`KEY13', `%xmm13')
-define(`KEYLAST', `%xmm14')
-define(`BLOCK', `%xmm15')
-
- .file "aes-encrypt-internal.asm"
-
- C _aes_encrypt(unsigned rounds, const uint32_t *keys,
- C const struct aes_table *T,
- C size_t length, uint8_t *dst,
- C uint8_t *src)
- .text
- ALIGN(16)
-PROLOGUE(_nettle_aes_encrypt)
- W64_ENTRY(6, 16)
- shr $4, LENGTH
- test LENGTH, LENGTH
- jz .Lend
-
- movups (KEYS), KEY0
- movups 16(KEYS), KEY1
- movups 32(KEYS), KEY2
- movups 48(KEYS), KEY3
- movups 64(KEYS), KEY4
- movups 80(KEYS), KEY5
- movups 96(KEYS), KEY6
- movups 112(KEYS), KEY7
- movups 128(KEYS), KEY8
- movups 144(KEYS), KEY9
- lea 160(KEYS), KEYS
- sub $10, XREG(ROUNDS) C Also clears high half
- je .Lkey_last
-
- movups (KEYS), KEY10
- movups 16(KEYS), KEY11
- lea (KEYS, ROUNDS, 8), KEYS
- lea (KEYS, ROUNDS, 8), KEYS
-
- cmpl $2, XREG(ROUNDS)
- je .Lkey_last
- movups -32(KEYS), KEY12
- movups -16(KEYS), KEY13
-
-.Lkey_last:
- movups (KEYS), KEYLAST
-
-.Lblock_loop:
- movups (SRC), BLOCK
- pxor KEY0, BLOCK
- aesenc KEY1, BLOCK
- aesenc KEY2, BLOCK
- aesenc KEY3, BLOCK
- aesenc KEY4, BLOCK
- aesenc KEY5, BLOCK
- aesenc KEY6, BLOCK
- aesenc KEY7, BLOCK
- aesenc KEY8, BLOCK
- aesenc KEY9, BLOCK
- testl XREG(ROUNDS), XREG(ROUNDS)
- je .Lblock_end
- aesenc KEY10, BLOCK
- aesenc KEY11, BLOCK
- cmpl $2, XREG(ROUNDS)
- je .Lblock_end
-
- aesenc KEY12, BLOCK
- aesenc KEY13, BLOCK
-
-.Lblock_end:
- aesenclast KEYLAST, BLOCK
-
- movups BLOCK, (DST)
- add $16, SRC
- add $16, DST
- dec LENGTH
- jnz .Lblock_loop
-
-.Lend:
- W64_EXIT(6, 16)
- ret
-EPILOGUE(_nettle_aes_encrypt)
diff --git a/x86_64/aesni/aes128-decrypt.asm b/x86_64/aesni/aes128-decrypt.asm
new file mode 100644
index 00000000..79111e47
--- /dev/null
+++ b/x86_64/aesni/aes128-decrypt.asm
@@ -0,0 +1,136 @@
+C x86_64/aesni/aes128-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`X', `%xmm11')
+define(`Y', `%xmm12')
+
+ .file "aes128-decrypt.asm"
+
+ C nettle_aes128_decrypt(const struct aes128_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes128_decrypt)
+ W64_ENTRY(4, 13)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdeclast KEY10, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesdec KEY1, X
+ aesdec KEY1, Y
+ aesdec KEY2, X
+ aesdec KEY2, Y
+ aesdec KEY3, X
+ aesdec KEY3, Y
+ aesdec KEY4, X
+ aesdec KEY4, Y
+ aesdec KEY5, X
+ aesdec KEY5, Y
+ aesdec KEY6, X
+ aesdec KEY6, Y
+ aesdec KEY7, X
+ aesdec KEY7, Y
+ aesdec KEY8, X
+ aesdec KEY8, Y
+ aesdec KEY9, X
+ aesdec KEY9, Y
+ aesdeclast KEY10, X
+ aesdeclast KEY10, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 13)
+ ret
+EPILOGUE(nettle_aes128_decrypt)
diff --git a/x86_64/aesni/aes128-encrypt.asm b/x86_64/aesni/aes128-encrypt.asm
new file mode 100644
index 00000000..8e7ebe78
--- /dev/null
+++ b/x86_64/aesni/aes128-encrypt.asm
@@ -0,0 +1,136 @@
+C x86_64/aesni/aes128-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`X', `%xmm11')
+define(`Y', `%xmm12')
+
+ .file "aes128-encrypt.asm"
+
+ C nettle_aes128_encrypt(const struct aes128_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes128_encrypt)
+ W64_ENTRY(4, 13)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenclast KEY10, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesenc KEY1, X
+ aesenc KEY1, Y
+ aesenc KEY2, X
+ aesenc KEY2, Y
+ aesenc KEY3, X
+ aesenc KEY3, Y
+ aesenc KEY4, X
+ aesenc KEY4, Y
+ aesenc KEY5, X
+ aesenc KEY5, Y
+ aesenc KEY6, X
+ aesenc KEY6, Y
+ aesenc KEY7, X
+ aesenc KEY7, Y
+ aesenc KEY8, X
+ aesenc KEY8, Y
+ aesenc KEY9, X
+ aesenc KEY9, Y
+ aesenclast KEY10, X
+ aesenclast KEY10, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 13)
+ ret
+EPILOGUE(nettle_aes128_encrypt)
diff --git a/x86_64/aesni/aes192-decrypt.asm b/x86_64/aesni/aes192-decrypt.asm
new file mode 100644
index 00000000..399f89b6
--- /dev/null
+++ b/x86_64/aesni/aes192-decrypt.asm
@@ -0,0 +1,146 @@
+C x86_64/aesni/aes192-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`X', `%xmm13')
+define(`Y', `%xmm14')
+
+ .file "aes192-decrypt.asm"
+
+ C nettle_aes192_decrypt(const struct aes192_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes192_decrypt)
+ W64_ENTRY(4, 15)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdec KEY10, X
+ aesdec KEY11, X
+ aesdeclast KEY12, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesdec KEY1, X
+ aesdec KEY1, Y
+ aesdec KEY2, X
+ aesdec KEY2, Y
+ aesdec KEY3, X
+ aesdec KEY3, Y
+ aesdec KEY4, X
+ aesdec KEY4, Y
+ aesdec KEY5, X
+ aesdec KEY5, Y
+ aesdec KEY6, X
+ aesdec KEY6, Y
+ aesdec KEY7, X
+ aesdec KEY7, Y
+ aesdec KEY8, X
+ aesdec KEY8, Y
+ aesdec KEY9, X
+ aesdec KEY9, Y
+ aesdec KEY10, X
+ aesdec KEY10, Y
+ aesdec KEY11, X
+ aesdec KEY11, Y
+ aesdeclast KEY12, X
+ aesdeclast KEY12, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 15)
+ ret
+EPILOGUE(nettle_aes192_decrypt)
diff --git a/x86_64/aesni/aes192-encrypt.asm b/x86_64/aesni/aes192-encrypt.asm
new file mode 100644
index 00000000..67271b83
--- /dev/null
+++ b/x86_64/aesni/aes192-encrypt.asm
@@ -0,0 +1,146 @@
+C x86_64/aesni/aes192-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`X', `%xmm13')
+define(`Y', `%xmm14')
+
+ .file "aes192-encrypt.asm"
+
+ C nettle_aes192_encrypt(const struct aes192_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes192_encrypt)
+ W64_ENTRY(4, 15)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ shr LENGTH
+ jnc .Lblock_loop
+
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenclast KEY12, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ test LENGTH, LENGTH
+ jz .Lend
+
+.Lblock_loop:
+ movups (SRC), X
+ movups 16(SRC), Y
+ pxor KEY0, X
+ pxor KEY0, Y
+ aesenc KEY1, X
+ aesenc KEY1, Y
+ aesenc KEY2, X
+ aesenc KEY2, Y
+ aesenc KEY3, X
+ aesenc KEY3, Y
+ aesenc KEY4, X
+ aesenc KEY4, Y
+ aesenc KEY5, X
+ aesenc KEY5, Y
+ aesenc KEY6, X
+ aesenc KEY6, Y
+ aesenc KEY7, X
+ aesenc KEY7, Y
+ aesenc KEY8, X
+ aesenc KEY8, Y
+ aesenc KEY9, X
+ aesenc KEY9, Y
+ aesenc KEY10, X
+ aesenc KEY10, Y
+ aesenc KEY11, X
+ aesenc KEY11, Y
+ aesenclast KEY12, X
+ aesenclast KEY12, Y
+
+ movups X, (DST)
+ movups Y, 16(DST)
+ add $32, SRC
+ add $32, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 15)
+ ret
+EPILOGUE(nettle_aes192_encrypt)
diff --git a/x86_64/aesni/aes256-decrypt.asm b/x86_64/aesni/aes256-decrypt.asm
new file mode 100644
index 00000000..122f1db6
--- /dev/null
+++ b/x86_64/aesni/aes256-decrypt.asm
@@ -0,0 +1,113 @@
+C x86_64/aesni/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEY14', `%xmm14')
+define(`X', `%xmm15')
+
+ .file "aes256-decrypt.asm"
+
+ C nettle_aes256_decrypt(const struct aes256_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes256_decrypt)
+ W64_ENTRY(4, 16)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), KEY13
+ movups 224(CTX), KEY14
+
+.Lblock_loop:
+ movups (SRC), X
+ pxor KEY0, X
+ aesdec KEY1, X
+ aesdec KEY2, X
+ aesdec KEY3, X
+ aesdec KEY4, X
+ aesdec KEY5, X
+ aesdec KEY6, X
+ aesdec KEY7, X
+ aesdec KEY8, X
+ aesdec KEY9, X
+ aesdec KEY10, X
+ aesdec KEY11, X
+ aesdec KEY12, X
+ aesdec KEY13, X
+ aesdeclast KEY14, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 16)
+ ret
+EPILOGUE(nettle_aes256_decrypt)
diff --git a/x86_64/aesni/aes256-encrypt.asm b/x86_64/aesni/aes256-encrypt.asm
new file mode 100644
index 00000000..b261a237
--- /dev/null
+++ b/x86_64/aesni/aes256-encrypt.asm
@@ -0,0 +1,113 @@
+C x86_64/aesni/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2015, 2018, 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+C Input argument
+define(`CTX', `%rdi')
+define(`LENGTH',`%rsi')
+define(`DST', `%rdx')
+define(`SRC', `%rcx')
+
+define(`KEY0', `%xmm0')
+define(`KEY1', `%xmm1')
+define(`KEY2', `%xmm2')
+define(`KEY3', `%xmm3')
+define(`KEY4', `%xmm4')
+define(`KEY5', `%xmm5')
+define(`KEY6', `%xmm6')
+define(`KEY7', `%xmm7')
+define(`KEY8', `%xmm8')
+define(`KEY9', `%xmm9')
+define(`KEY10', `%xmm10')
+define(`KEY11', `%xmm11')
+define(`KEY12', `%xmm12')
+define(`KEY13', `%xmm13')
+define(`KEY14', `%xmm14')
+define(`X', `%xmm15')
+
+ .file "aes256-encrypt.asm"
+
+ C nettle_aes256_encrypt(const struct aes256_ctx *ctx,
+ C size_t length, uint8_t *dst,
+ C const uint8_t *src);
+
+ .text
+ ALIGN(16)
+PROLOGUE(nettle_aes256_encrypt)
+ W64_ENTRY(4, 16)
+ shr $4, LENGTH
+ test LENGTH, LENGTH
+ jz .Lend
+
+ movups (CTX), KEY0
+ movups 16(CTX), KEY1
+ movups 32(CTX), KEY2
+ movups 48(CTX), KEY3
+ movups 64(CTX), KEY4
+ movups 80(CTX), KEY5
+ movups 96(CTX), KEY6
+ movups 112(CTX), KEY7
+ movups 128(CTX), KEY8
+ movups 144(CTX), KEY9
+ movups 160(CTX), KEY10
+ movups 176(CTX), KEY11
+ movups 192(CTX), KEY12
+ movups 208(CTX), KEY13
+ movups 224(CTX), KEY14
+
+.Lblock_loop:
+ movups (SRC), X
+ pxor KEY0, X
+ aesenc KEY1, X
+ aesenc KEY2, X
+ aesenc KEY3, X
+ aesenc KEY4, X
+ aesenc KEY5, X
+ aesenc KEY6, X
+ aesenc KEY7, X
+ aesenc KEY8, X
+ aesenc KEY9, X
+ aesenc KEY10, X
+ aesenc KEY11, X
+ aesenc KEY12, X
+ aesenc KEY13, X
+ aesenclast KEY14, X
+
+ movups X, (DST)
+ add $16, SRC
+ add $16, DST
+ dec LENGTH
+ jnz .Lblock_loop
+
+.Lend:
+ W64_EXIT(4, 16)
+ ret
+EPILOGUE(nettle_aes256_encrypt)
diff --git a/x86_64/fat/aes-encrypt-internal-2.asm b/x86_64/fat/aes128-decrypt-2.asm
index c0a4f3d9..4015ee5a 100644
--- a/x86_64/fat/aes-encrypt-internal-2.asm
+++ b/x86_64/fat/aes128-decrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-encrypt-internal-2.asm
-
+C x86_64/fat/aes128-decrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_aesni')
-include_src(`x86_64/aesni/aes-encrypt-internal.asm')
+dnl PROLOGUE(nettle_aes128_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes128-decrypt.asm')
diff --git a/x86_64/fat/aes-encrypt-internal.asm b/x86_64/fat/aes128-encrypt-2.asm
index 0f7f0134..b49ddca6 100644
--- a/x86_64/fat/aes-encrypt-internal.asm
+++ b/x86_64/fat/aes128-encrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-encrypt-internal.asm
-
+C x86_64/fat/aes128-encrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/aes-encrypt-internal.asm')
+dnl PROLOGUE(nettle_aes128_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes128-encrypt.asm')
diff --git a/x86_64/fat/aes-decrypt-internal-2.asm b/x86_64/fat/aes192-decrypt-2.asm
index cd6a72e2..713610e1 100644
--- a/x86_64/fat/aes-decrypt-internal-2.asm
+++ b/x86_64/fat/aes192-decrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-decrypt-internal-2.asm
-
+C x86_64/fat/aes192-decrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_aesni')
-include_src(`x86_64/aesni/aes-decrypt-internal.asm')
+dnl PROLOGUE(nettle_aes192_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes192-decrypt.asm')
diff --git a/x86_64/fat/aes-decrypt-internal.asm b/x86_64/fat/aes192-encrypt-2.asm
index 4b9e8f16..ee2bbf56 100644
--- a/x86_64/fat/aes-decrypt-internal.asm
+++ b/x86_64/fat/aes192-encrypt-2.asm
@@ -1,8 +1,7 @@
-C x86_64/fat/aes-decrypt-internal.asm
-
+C x86_64/fat/aes192-encrypt.asm
ifelse(`
- Copyright (C) 2015 Niels Möller
+ Copyright (C) 2021 Niels Möller
This file is part of GNU Nettle.
@@ -31,5 +30,7 @@ ifelse(`
not, see http://www.gnu.org/licenses/.
')
-define(`fat_transform', `$1_x86_64')
-include_src(`x86_64/aes-decrypt-internal.asm')
+dnl PROLOGUE(nettle_aes192_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes192-encrypt.asm')
diff --git a/x86_64/fat/aes256-decrypt-2.asm b/x86_64/fat/aes256-decrypt-2.asm
new file mode 100644
index 00000000..d596b257
--- /dev/null
+++ b/x86_64/fat/aes256-decrypt-2.asm
@@ -0,0 +1,36 @@
+C x86_64/fat/aes256-decrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_decrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes256-decrypt.asm')
diff --git a/x86_64/fat/aes256-encrypt-2.asm b/x86_64/fat/aes256-encrypt-2.asm
new file mode 100644
index 00000000..9aa3c13e
--- /dev/null
+++ b/x86_64/fat/aes256-encrypt-2.asm
@@ -0,0 +1,36 @@
+C x86_64/fat/aes256-encrypt.asm
+
+ifelse(`
+ Copyright (C) 2021 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+dnl PROLOGUE(nettle_aes256_encrypt) picked up by configure
+
+define(`fat_transform', `_$1_aesni')
+include_src(`x86_64/aesni/aes256-encrypt.asm')