summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNiels Möller <nisse@lysator.liu.se>2022-10-31 19:18:24 +0100
committerNiels Möller <nisse@lysator.liu.se>2022-10-31 19:18:24 +0100
commit57dce2ba98b2fc2dda44353e4eebdc3b7c932200 (patch)
tree8d536d722784e562e1299da53bc5a091acf13579
parentbb9c0a1ed4e6b3be4c5a259d3a6192960bc0c432 (diff)
downloadnettle-57dce2ba98b2fc2dda44353e4eebdc3b7c932200.tar.gz
x86_64: Implement _nettle_poly1305_blocks.
-rw-r--r--ChangeLog4
-rw-r--r--configure.ac3
-rw-r--r--x86_64/poly1305-blocks.asm128
3 files changed, 134 insertions, 1 deletions
diff --git a/ChangeLog b/ChangeLog
index 5aaa1b1c..0246a7dc 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
2022-10-31 Niels Möller <nisse@lysator.liu.se>
+ * configure.ac: (asm_file_list): Add HAVE_NATIVE_poly1305_blocks.
+ (asm_nettle_optional_list): Add poly1305-blocks.asm.
+ * x86_64/poly1305-blocks.asm: New file.
+
* md-internal.h (MD_FILL_OR_RETURN_INDEX): New macro.
* poly1305-update.c (_nettle_poly1305_update): New file and
function.
diff --git a/configure.ac b/configure.ac
index 59f68b00..4e9dceda 100644
--- a/configure.ac
+++ b/configure.ac
@@ -612,7 +612,7 @@ asm_nettle_optional_list="cpuid.asm cpu-facility.asm \
aes256-encrypt-2.asm aes256-decrypt-2.asm \
cbc-aes128-encrypt-2.asm cbc-aes192-encrypt-2.asm cbc-aes256-encrypt-2.asm \
chacha-2core.asm chacha-3core.asm chacha-4core.asm chacha-core-internal-2.asm \
- poly1305-internal-2.asm \
+ poly1305-blocks.asm poly1305-internal-2.asm \
ghash-set-key-2.asm ghash-update-2.asm \
salsa20-2core.asm salsa20-core-internal-2.asm \
sha1-compress-2.asm sha256-compress-n-2.asm \
@@ -762,6 +762,7 @@ AH_VERBATIM([HAVE_NATIVE],
#undef HAVE_NATIVE_poly1305_set_key
#undef HAVE_NATIVE_poly1305_block
#undef HAVE_NATIVE_poly1305_digest
+#undef HAVE_NATIVE_poly1305_blocks
#undef HAVE_NATIVE_ghash_set_key
#undef HAVE_NATIVE_ghash_update
#undef HAVE_NATIVE_salsa20_core
diff --git a/x86_64/poly1305-blocks.asm b/x86_64/poly1305-blocks.asm
new file mode 100644
index 00000000..63bfed3e
--- /dev/null
+++ b/x86_64/poly1305-blocks.asm
@@ -0,0 +1,128 @@
+C x86_64/poly1305-blocks.asm
+
+ifelse(`
+ Copyright (C) 2022 Niels Möller
+
+ This file is part of GNU Nettle.
+
+ GNU Nettle is free software: you can redistribute it and/or
+ modify it under the terms of either:
+
+ * the GNU Lesser General Public License as published by the Free
+ Software Foundation; either version 3 of the License, or (at your
+ option) any later version.
+
+ or
+
+ * the GNU General Public License as published by the Free
+ Software Foundation; either version 2 of the License, or (at your
+ option) any later version.
+
+ or both in parallel, as here.
+
+ GNU Nettle is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received copies of the GNU General Public License and
+ the GNU Lesser General Public License along with this program. If
+ not, see http://www.gnu.org/licenses/.
+')
+
+ .file "poly1305-blocks.asm"
+
+define(`CTX', `%rdi') C First argument to all functions
+define(`BLOCKS', `%rsi')
+define(`MP_PARAM', `%rdx') C Moved to MP, to not collide with mul instruction.
+
+define(`MP', `%r8') C May clobber, both with unix and windows conventions.
+define(`T0', `%rbx')
+define(`T1', `%rcx')
+define(`H0', `%rbp')
+define(`H1', `%r9')
+define(`H2', `%r10')
+define(`F0', `%r11')
+define(`F1', `%r12')
+
+C const uint8_t *
+C _nettle_poly1305_blocks (struct poly1305_ctx *ctx, size_t blocks, const uint8_t *m)
+
+PROLOGUE(_nettle_poly1305_blocks)
+ W64_ENTRY(3, 0)
+ mov MP_PARAM, MP
+ test BLOCKS, BLOCKS
+ jz .Lend
+
+ push %rbx
+ push %rbp
+ push %r12
+ mov P1305_H0 (CTX), H0
+ mov P1305_H1 (CTX), H1
+ mov P1305_H2 (CTX), H2
+ ALIGN(16)
+.Loop:
+ mov (MP), T0
+ mov 8(MP), T1
+ add $16, MP
+
+ add H0, T0
+ adc H1, T1
+ adc $1, H2
+
+ mov P1305_R1 (CTX), %rax
+ mul T0 C R1*T0
+ mov %rax, F0
+ mov %rdx, F1
+
+ mov T0, %rax C Last use of T0 input
+ mov P1305_R0 (CTX), T0
+ mul T0 C R0*T0
+ mov %rax, H0
+ mov %rdx, H1
+
+ mov T1, %rax
+ mul T0 C R0*T1
+ add %rax, F0
+ adc %rdx, F1
+
+ mov P1305_S1 (CTX), T0
+ mov T1, %rax C Last use of T1 input
+ mul T0 C S1*T1
+ add %rax, H0
+ adc %rdx, H1
+
+ mov H2, %rax
+ mul T0 C S1*H2
+ add %rax, F0
+ adc %rdx, F1
+
+ mov H2, T0
+ and $3, H2
+
+ shr $2, T0
+ mov P1305_S0 (CTX), %rax
+ mul T0 C S0*(H2 >> 2)
+ add %rax, H0
+ adc %rdx, H1
+
+ imul P1305_R0 (CTX), H2 C R0*(H2 & 3)
+ add F0, H1
+ adc F1, H2
+
+ dec BLOCKS
+ jnz .Loop
+
+ mov H0, P1305_H0 (CTX)
+ mov H1, P1305_H1 (CTX)
+ mov H2, P1305_H2 (CTX)
+
+ pop %r12
+ pop %rbp
+ pop %rbx
+
+.Lend:
+ mov MP, %rax
+ W64_EXIT(3, 0)
+ ret
+EPILOGUE(_nettle_poly1305_blocks)