summaryrefslogtreecommitdiff
path: root/stdlib
diff options
context:
space:
mode:
authorAdhemerval Zanella Netto <adhemerval.zanella@linaro.org>2022-07-21 10:05:02 -0300
committerAdhemerval Zanella <adhemerval.zanella@linaro.org>2022-07-22 11:58:27 -0300
commit4c128c7823e5a19058589cfac42aa96de3e15430 (patch)
treec214ca1a59aaa09294629a5b37d303bb639167b5 /stdlib
parent5d765ada01d140d8d1ecf94953a4751593af720d (diff)
downloadglibc-4c128c7823e5a19058589cfac42aa96de3e15430.tar.gz
aarch64: Add optimized chacha20
It adds vectorized ChaCha20 implementation based on libgcrypt cipher/chacha20-aarch64.S. It is used as default and only little-endian is supported (BE uses generic code). As for generic implementation, the last step that XOR with the input is omited. The final state register clearing is also omitted. On a virtualized Linux on Apple M1 it shows the following improvements (using formatted bench-arc4random data): GENERIC MB/s ----------------------------------------------- arc4random [single-thread] 380.89 arc4random_buf(16) [single-thread] 500.73 arc4random_buf(32) [single-thread] 552.61 arc4random_buf(48) [single-thread] 566.82 arc4random_buf(64) [single-thread] 574.01 arc4random_buf(80) [single-thread] 581.02 arc4random_buf(96) [single-thread] 591.19 arc4random_buf(112) [single-thread] 592.29 arc4random_buf(128) [single-thread] 596.43 ----------------------------------------------- OPTIMIZED MB/s ----------------------------------------------- arc4random [single-thread] 569.60 arc4random_buf(16) [single-thread] 825.78 arc4random_buf(32) [single-thread] 987.03 arc4random_buf(48) [single-thread] 1042.39 arc4random_buf(64) [single-thread] 1075.50 arc4random_buf(80) [single-thread] 1094.68 arc4random_buf(96) [single-thread] 1130.16 arc4random_buf(112) [single-thread] 1129.58 arc4random_buf(128) [single-thread] 1137.91 ----------------------------------------------- Checked on aarch64-linux-gnu.
Diffstat (limited to 'stdlib')
-rw-r--r--stdlib/chacha20.c8
1 files changed, 6 insertions, 2 deletions
diff --git a/stdlib/chacha20.c b/stdlib/chacha20.c
index c47b8418f2..2745a81315 100644
--- a/stdlib/chacha20.c
+++ b/stdlib/chacha20.c
@@ -165,8 +165,9 @@ chacha20_block (uint32_t *state, uint8_t *dst, const uint8_t *src)
}
static void
-chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src,
- size_t bytes)
+__attribute_maybe_unused__
+chacha20_crypt_generic (uint32_t *state, uint8_t *dst, const uint8_t *src,
+ size_t bytes)
{
while (bytes >= CHACHA20_BLOCK_SIZE)
{
@@ -185,3 +186,6 @@ chacha20_crypt (uint32_t *state, uint8_t *dst, const uint8_t *src,
explicit_bzero (stream, sizeof stream);
}
}
+
+/* Get the architecture optimized version. */
+#include <chacha20_arch.h>