summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2023-04-02 20:20:50 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2023-04-03 21:36:50 +0300
commit278ba98497e126358a6b0ee8b566cc62ebb96ab9 (patch)
treef1df9262ee5a2f033ef52e4663d06b8834d0b927
parent3660935d2d5053584f2c9349c6ff454c51c8bbfc (diff)
downloadlibgcrypt-278ba98497e126358a6b0ee8b566cc62ebb96ab9.tar.gz
cipher-gcm-ppc: tweak loop structure a bit
* cipher/cipher-gcm-ppc.c (_gcry_ghash_ppc_vpmsum): Increament 'buf' pointer right after use; Use 'for' loop for inner 4-blocks loop to allow compiler to better optimize loop. -- Benchmark on POWER9: Before: | nanosecs/byte mebibytes/sec cycles/byte GMAC_AES | 0.226 ns/B 4211 MiB/s 0.521 c/B After: | nanosecs/byte mebibytes/sec cycles/byte GMAC_AES | 0.224 ns/B 4251 MiB/s 0.516 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--cipher/cipher-gcm-ppc.c9
1 files changed, 3 insertions, 6 deletions
diff --git a/cipher/cipher-gcm-ppc.c b/cipher/cipher-gcm-ppc.c
index 4f75e95c..550864c1 100644
--- a/cipher/cipher-gcm-ppc.c
+++ b/cipher/cipher-gcm-ppc.c
@@ -437,6 +437,7 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
in1 = vec_load_he (16, buf);
in2 = vec_load_he (32, buf);
in3 = vec_load_he (48, buf);
+ buf += 64;
in0 = vec_be_swap(in0, bswap_const);
in1 = vec_be_swap(in1, bswap_const);
in2 = vec_be_swap(in2, bswap_const);
@@ -464,17 +465,13 @@ _gcry_ghash_ppc_vpmsum (byte *result, void *gcm_table,
Xh3 = asm_xor (Xh3, Xh1);
/* Gerald Estrin's scheme for parallel multiplication of polynomials */
- while (1)
+ for (; blocks_remaining > 4; blocks_remaining -= 4)
{
- buf += 64;
- blocks_remaining -= 4;
- if (!blocks_remaining)
- break;
-
in0 = vec_load_he (0, buf);
in1 = vec_load_he (16, buf);
in2 = vec_load_he (32, buf);
in3 = vec_load_he (48, buf);
+ buf += 64;
in1 = vec_be_swap(in1, bswap_const);
in2 = vec_be_swap(in2, bswap_const);
in3 = vec_be_swap(in3, bswap_const);