diff options
-rw-r--r-- | cipher/chacha20-ppc.c | 118 | ||||
-rw-r--r-- | cipher/chacha20.c | 55 |
2 files changed, 154 insertions, 19 deletions
diff --git a/cipher/chacha20-ppc.c b/cipher/chacha20-ppc.c index 4a21b837..3fe7bc8c 100644 --- a/cipher/chacha20-ppc.c +++ b/cipher/chacha20-ppc.c @@ -136,9 +136,8 @@ vec_add_ctr_u64(vector4x_u32 v, vector4x_u32 a) #define ADD_U64(v,a) \ (v = vec_add_ctr_u64(v, a)) -unsigned int ASM_FUNC_ATTR -_gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, - size_t nblks) +static unsigned int ASM_FUNC_ATTR_INLINE +chacha20_ppc_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks) { vector4x_u32 counter_1 = { 1, 0, 0, 0 }; vector4x_u32 rotate_16 = { 16, 16, 16, 16 }; @@ -283,9 +282,8 @@ _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ ROTATE(b1, rotate_7); ROTATE(b2, rotate_7); -unsigned int ASM_FUNC_ATTR -_gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, - size_t nblks) +static unsigned int ASM_FUNC_ATTR_INLINE +chacha20_ppc_blocks4(u32 *state, byte *dst, const byte *src, size_t nblks) { vector4x_u32 counters_0123 = { 0, 1, 2, 3 }; vector4x_u32 counter_4 = { 4, 0, 0, 0 }; @@ -470,10 +468,10 @@ _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, MUL_MOD_1305_64_PART2(h2, h1, h0, r1, r0, r1_mult5); \ } while (0) -unsigned int ASM_FUNC_ATTR -_gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src, - size_t nblks, POLY1305_STATE *st, - const byte *poly1305_src) +static unsigned int ASM_FUNC_ATTR_INLINE +chacha20_poly1305_ppc_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) { vector4x_u32 counters_0123 = { 0, 1, 2, 3 }; vector4x_u32 counter_4 = { 4, 0, 0, 0 }; @@ -641,6 +639,106 @@ _gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src, return 0; } +#else + +static unsigned int ASM_FUNC_ATTR_INLINE +chacha20_poly1305_ppc_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) +{ +} + #endif /* SIZEOF_UNSIGNED_LONG == 8 */ + +#ifdef HAVE_GCC_ATTRIBUTE_OPTIMIZE +# define FUNC_ATTR_OPT_O2 __attribute__((optimize("-O2"))) +#else +# define FUNC_ATTR_OPT_O2 +#endif + +#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET +# define FUNC_ATTR_TARGET_P8 __attribute__((target("cpu=power8"))) +# define FUNC_ATTR_TARGET_P9 __attribute__((target("cpu=power9"))) +#else +# define FUNC_ATTR_TARGET_P8 +# define FUNC_ATTR_TARGET_P9 +#endif + + +/* Functions targetting POWER8. */ +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P8 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return chacha20_ppc_blocks1(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P8 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return chacha20_ppc_blocks4(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P8 FUNC_ATTR_OPT_O2 +_gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) +{ + return chacha20_poly1305_ppc_blocks4(state, dst, src, nblks, st, + poly1305_src); +} + +#ifdef HAVE_GCC_ATTRIBUTE_PPC_TARGET +/* Functions targetting POWER9. */ +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc9_blocks1(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return chacha20_ppc_blocks1(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc9_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return chacha20_ppc_blocks4(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_poly1305_ppc9_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) +{ + return chacha20_poly1305_ppc_blocks4(state, dst, src, nblks, st, + poly1305_src); +} +#else +/* Compiler does not support target attribute, use same functions for POWER9 + * as for POWER8. */ +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc9_blocks1(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return _gcry_chacha20_ppc8_blocks1(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_ppc9_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + return _gcry_chacha20_ppc8_blocks4(state, dst, src, nblks); +} + +unsigned int ASM_FUNC_ATTR FUNC_ATTR_TARGET_P9 FUNC_ATTR_OPT_O2 +_gcry_chacha20_poly1305_ppc9_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) +{ + return _gcry_chacha20_poly1305_ppc8_blocks4(state, dst, src, nblks, st, + poly1305_src); +} +#endif /* HAVE_GCC_ATTRIBUTE_PPC_TARGET */ + #endif /* ENABLE_PPC_CRYPTO_SUPPORT */ diff --git a/cipher/chacha20.c b/cipher/chacha20.c index a7e0dd63..d979d263 100644 --- a/cipher/chacha20.c +++ b/cipher/chacha20.c @@ -134,6 +134,7 @@ typedef struct CHACHA20_context_s unsigned int use_avx512:1; unsigned int use_neon:1; unsigned int use_ppc:1; + unsigned int use_p9:1; unsigned int use_p10:1; unsigned int use_s390x:1; } CHACHA20_context_t; @@ -195,12 +196,24 @@ unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, size_t nblks); +unsigned int _gcry_chacha20_ppc9_blocks4(u32 *state, byte *dst, + const byte *src, + size_t nblks); + +unsigned int _gcry_chacha20_ppc9_blocks1(u32 *state, byte *dst, + const byte *src, + size_t nblks); + #undef USE_PPC_VEC_POLY1305 #if SIZEOF_UNSIGNED_LONG == 8 #define USE_PPC_VEC_POLY1305 1 unsigned int _gcry_chacha20_poly1305_ppc8_blocks4( u32 *state, byte *dst, const byte *src, size_t nblks, POLY1305_STATE *st, const byte *poly1305_src); + +unsigned int _gcry_chacha20_poly1305_ppc9_blocks4( + u32 *state, byte *dst, const byte *src, size_t nblks, + POLY1305_STATE *st, const byte *poly1305_src); #endif /* SIZEOF_UNSIGNED_LONG == 8 */ #endif /* USE_PPC_VEC */ @@ -369,7 +382,10 @@ chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src, #ifdef USE_PPC_VEC if (ctx->use_ppc) { - return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); + if (ctx->use_p9) + return _gcry_chacha20_ppc9_blocks1(ctx->input, dst, src, nblks); + else + return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); } #endif @@ -509,6 +525,7 @@ chacha20_do_setkey (CHACHA20_context_t *ctx, #endif #ifdef USE_PPC_VEC ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; + ctx->use_p9 = (features & HWF_PPC_ARCH_3_00) != 0; # ifndef WORDS_BIGENDIAN ctx->use_p10 = (features & HWF_PPC_ARCH_3_10) != 0; # ifdef ENABLE_FORCE_SOFT_HWFEATURES @@ -626,18 +643,25 @@ do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, { size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; + if (0) + {} #ifndef WORDS_BIGENDIAN /* * A workaround to skip counter overflow. This is rare. */ - if (ctx->use_p10 && nblocks >= 8 - && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) + else if (ctx->use_p10 && nblocks >= 8 + && ((u64)ctx->input[12] + nblocks) <= 0xffffffffU) { size_t len = nblocks * CHACHA20_BLOCK_SIZE; nburn = _gcry_chacha20_p10le_8x(ctx->input, outbuf, inbuf, len); } - else #endif + else if (ctx->use_p9) + { + nburn = _gcry_chacha20_ppc9_blocks4(ctx->input, outbuf, inbuf, + nblocks); + } + else { nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks); @@ -844,7 +868,10 @@ _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, } else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) { - nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); + if (ctx->use_p9) + nburn = _gcry_chacha20_ppc9_blocks4(ctx->input, outbuf, inbuf, 4); + else + nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); burn = nburn > burn ? nburn : burn; authptr = outbuf; @@ -986,7 +1013,12 @@ _gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; - nburn = _gcry_chacha20_poly1305_ppc8_blocks4( + if (ctx->use_p9) + nburn = _gcry_chacha20_poly1305_ppc9_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + else + nburn = _gcry_chacha20_poly1305_ppc8_blocks4( ctx->input, outbuf, inbuf, nblocks, &c->u_mode.poly1305.ctx.state, authptr); burn = nburn > burn ? nburn : burn; @@ -1212,9 +1244,14 @@ _gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, size_t nblocks = length / CHACHA20_BLOCK_SIZE; nblocks -= nblocks % 4; - nburn = _gcry_chacha20_poly1305_ppc8_blocks4( - ctx->input, outbuf, inbuf, nblocks, - &c->u_mode.poly1305.ctx.state, inbuf); + if (ctx->use_p9) + nburn = _gcry_chacha20_poly1305_ppc9_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + else + nburn = _gcry_chacha20_poly1305_ppc8_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); burn = nburn > burn ? nburn : burn; length -= nblocks * CHACHA20_BLOCK_SIZE; |