diff options
author | Danny Tsen <dtsen@us.ibm.com> | 2022-06-12 21:30:19 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-06-12 22:02:48 +0300 |
commit | 88fe7ac33eb4cb4dff76a5cc7fca50da5fb0ee3a (patch) | |
tree | 395a466d420dab91a5f82c98bbe44f0f78bb4fb0 /cipher/chacha20.c | |
parent | 6d32bf80846a22568575a101a3fe6769ab058bb9 (diff) | |
download | libgcrypt-88fe7ac33eb4cb4dff76a5cc7fca50da5fb0ee3a.tar.gz |
Chacha20/poly1305 - Optimized chacha20/poly1305 for P10 operation
* configure.ac: Added chacha20 and poly1305 assembly implementations.
* cipher/chacha20-p10le-8x.s: (New) - support 8 blocks (512 bytes)
unrolling.
* cipher/poly1305-p10le.s: (New) - support 4 blocks (128 bytes)
unrolling.
* cipher/Makefile.am: Added new chacha20 and poly1305 files.
* cipher/chacha20.c: Added PPC p10 le support for 8x chacha20.
* cipher/poly1305.c: Added PPC p10 le support for 4x poly1305.
* cipher/poly1305-internal.h: Added PPC p10 le support for poly1305.
---
GnuPG-bug-id: 6006
Signed-off-by: Danny Tsen <dtsen@us.ibm.com>
[jk: cosmetic changes to C code]
[jk: fix building on ppc64be]
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/chacha20.c')
-rw-r--r-- | cipher/chacha20.c | 40 |
1 files changed, 39 insertions, 1 deletions
diff --git a/cipher/chacha20.c b/cipher/chacha20.c index 8dec4317..3518d95a 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -134,6 +134,7 @@ typedef struct CHACHA20_context_s unsigned int use_avx512:1; unsigned int use_neon:1; unsigned int use_ppc:1; + unsigned int use_p10:1; unsigned int use_s390x:1; } CHACHA20_context_t; @@ -180,6 +181,12 @@ unsigned int _gcry_chacha20_amd64_avx512_blocks16(u32 *state, byte *dst, #ifdef USE_PPC_VEC +#ifndef WORDS_BIGENDIAN +unsigned int _gcry_chacha20_p10le_8x(u32 *state, byte *dst, + const byte *src, + size_t len); +#endif + unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks); @@ -495,6 +502,9 @@ chacha20_do_setkey (CHACHA20_context_t *ctx, #endif #ifdef USE_PPC_VEC ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; +# ifndef WORDS_BIGENDIAN + ctx->use_p10 = (features & HWF_PPC_ARCH_3_10) != 0; +# endif #endif #ifdef USE_S390X_VX ctx->use_s390x = (features & HWF_S390X_VX) != 0; @@ -605,7 +615,22 @@ do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; - nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks); +#ifndef WORDS_BIGENDIAN + /* + * A workaround to skip counter overflow. This is rare. + */ + if (ctx->use_p10 && nblocks >= 8 + && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) + { + size_t len = nblocks * CHACHA20_BLOCK_SIZE; + nburn = _gcry_chacha20_p10le_8x(ctx->input, outbuf, inbuf, len); + } + else +#endif + { + nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, + nblocks); + } burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; outbuf += nblocks * CHACHA20_BLOCK_SIZE; @@ -801,6 +826,11 @@ _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, } #endif #ifdef USE_PPC_VEC_POLY1305 + else if (ctx->use_ppc && ctx->use_p10) + { + /* Skip stitched chacha20-poly1305 for P10. */ + authptr = NULL; + } else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) { nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); @@ -1084,6 +1114,13 @@ _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, skip_stitched = 1; } #endif +#ifdef USE_PPC_VEC_POLY1305 + if (ctx->use_ppc && ctx->use_p10) + { + /* Skip stitched chacha20-poly1305 for P10. */ + skip_stitched = 1; + } +#endif #ifdef USE_AVX2 if (!skip_stitched && ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE) @@ -1154,6 +1191,7 @@ _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, #endif #ifdef USE_PPC_VEC_POLY1305 + /* skip stitch for p10 */ if (!skip_stitched && ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE) { size_t nblocks = length / CHACHA20_BLOCK_SIZE; |