diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-04-04 11:06:36 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-04-04 11:06:36 +0300 |
commit | 1250a9cd859d99f487ca8d76a98d70d464324bbe (patch) | |
tree | 4c3451a4d482c3160349e0fa84b328e3fbfdd014 /cipher/sha512-ppc.c | |
parent | 89b3ded8df969fe5fb31313c60419dd34d36b605 (diff) | |
download | libgcrypt-1250a9cd859d99f487ca8d76a98d70d464324bbe.tar.gz |
ppc: avoid using vec_vsx_ld/vec_vsx_st for 2x64-bit vectors
* cipher/crc-ppc.c (CRC_VEC_U64_LOAD, CRC_VEC_U64_LOAD_LE)
(CRC_VEC_U64_LOAD_BE): Remove vec_vsx_ld usage.
(asm_vec_u64_load, asm_vec_u64_load_le): New.
* cipher/sha512-ppc.c (vec_vshasigma_u64): Use '__asm__' instead of
'asm' for assembly block.
(vec_u64_load, vec_u64_store): New.
(_gcry_sha512_transform_ppc8): Use vec_u64_load/store instead of
vec_vsx_ld/vec_vsx_st.
* configure.ac (gcy_cv_cc_ppc_altivec)
(gcy_cv_cc_ppc_altivec_cflags): Add check for vec_vsx_ld with
'unsigned int *' pointer type.
--
GCC 7.5 and clang 8.0 do not support vec_vsx_ld with 'unsigned long long *'
pointer type. Switch code to use inline assembly instead. As vec_vsx_ld
is still used with 'unsigned int *' pointers, add new check for this in
configure.ac.
GnuPG-bug-id: 4906
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512-ppc.c')
-rw-r--r-- | cipher/sha512-ppc.c | 72 |
1 files changed, 60 insertions, 12 deletions
diff --git a/cipher/sha512-ppc.c b/cipher/sha512-ppc.c index a758e1ea..31ea25bf 100644 --- a/cipher/sha512-ppc.c +++ b/cipher/sha512-ppc.c @@ -115,14 +115,62 @@ vec_merge_idx0_elems(vector2x_u64 v0, vector2x_u64 v1) static ASM_FUNC_ATTR_INLINE vector2x_u64 vec_vshasigma_u64(vector2x_u64 v, unsigned int a, unsigned int b) { - asm ("vshasigmad %0,%1,%2,%3" - : "=v" (v) - : "v" (v), "g" (a), "g" (b) - : "memory"); + __asm__ ("vshasigmad %0,%1,%2,%3" + : "=v" (v) + : "v" (v), "g" (a), "g" (b) + : "memory"); return v; } +static ASM_FUNC_ATTR_INLINE vector2x_u64 +vec_u64_load(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif + return vecu64; +} + + +static ASM_FUNC_ATTR_INLINE void +vec_u64_store(vector2x_u64 vecu64, unsigned long offset, void *ptr) +{ +#ifndef WORDS_BIGENDIAN + __asm__ ("xxswapd %x0, %x1" + : "=wa" (vecu64) + : "wa" (vecu64)); +#endif +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("stxvd2x %x0,0,%1\n\t" + : + : "wa" (vecu64), "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ ("stxvd2x %x0,%1,%2\n\t" + : + : "wa" (vecu64), "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); +} + + /* SHA2 round in vector registers */ #define R(a,b,c,d,e,f,g,h,k,w) do \ { \ @@ -168,13 +216,13 @@ _gcry_sha512_transform_ppc8(u64 state[8], vector2x_u64 a, b, c, d, e, f, g, h, t1, t2; u64 w[16]; - h0 = vec_vsx_ld (8 * 0, (unsigned long long *)state); + h0 = vec_u64_load (8 * 0, (unsigned long long *)state); h1 = vec_rol_elems (h0, 1); - h2 = vec_vsx_ld (8 * 2, (unsigned long long *)state); + h2 = vec_u64_load (8 * 2, (unsigned long long *)state); h3 = vec_rol_elems (h2, 1); - h4 = vec_vsx_ld (8 * 4, (unsigned long long *)state); + h4 = vec_u64_load (8 * 4, (unsigned long long *)state); h5 = vec_rol_elems (h4, 1); - h6 = vec_vsx_ld (8 * 6, (unsigned long long *)state); + h6 = vec_u64_load (8 * 6, (unsigned long long *)state); h7 = vec_rol_elems (h6, 1); while (nblks >= 2) @@ -514,10 +562,10 @@ _gcry_sha512_transform_ppc8(u64 state[8], h2 = vec_merge_idx0_elems (h2, h3); h4 = vec_merge_idx0_elems (h4, h5); h6 = vec_merge_idx0_elems (h6, h7); - vec_vsx_st (h0, 8 * 0, (unsigned long long *)state); - vec_vsx_st (h2, 8 * 2, (unsigned long long *)state); - vec_vsx_st (h4, 8 * 4, (unsigned long long *)state); - vec_vsx_st (h6, 8 * 6, (unsigned long long *)state); + vec_u64_store (h0, 8 * 0, (unsigned long long *)state); + vec_u64_store (h2, 8 * 2, (unsigned long long *)state); + vec_u64_store (h4, 8 * 4, (unsigned long long *)state); + vec_u64_store (h6, 8 * 6, (unsigned long long *)state); return sizeof(w); } |