diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-02-02 19:52:08 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2020-02-02 19:52:08 +0200 |
commit | b64b029318e7d0b66123015146614118f466a7a9 (patch) | |
tree | 974284d2766bdbb978b4d888946633b1f7478c4d | |
parent | 89776d45c824032409f581e5fd1db6bf149df57f (diff) | |
download | libgcrypt-b64b029318e7d0b66123015146614118f466a7a9.tar.gz |
crc-ppc: fix bad register used for vector load/store assembly
* cipher/crc-ppc.c (CRC_VEC_U64_LOAD_BE): Move implementation to...
(asm_vec_u64_load_be): ...here; Add "r0" to clobber list for load
instruction when offset is not zero; Add zero offset path.
--
Register r0 must not be used for RA input for vector load/store
instructions as r0 is not read as register but as value '0'.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | cipher/crc-ppc.c | 41 |
1 files changed, 28 insertions, 13 deletions
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c index 7dda90c5..4d7f0add 100644 --- a/cipher/crc-ppc.c +++ b/cipher/crc-ppc.c @@ -1,5 +1,5 @@ /* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation - * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -168,22 +168,37 @@ static const vector16x_u8 bswap_const ALIGNED_64 = # define CRC_VEC_U64_LOAD(offs, ptr) \ vec_vsx_ld((offs), (const unsigned long long *)(ptr)) # define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr)) -# define CRC_VEC_U64_LOAD_BE(offs, ptr) \ - ({ \ - vector2x_u64 __vecu64; \ - __asm__ ("lxvd2x %%vs32,%1,%2\n\t" \ - "vperm %0,%%v0,%%v0,%3\n\t" \ - : "=v" (__vecu64) \ - : "r" (offs), "r" ((uintptr_t)(ptr)), \ - "v" (vec_load_le_const) \ - : "memory", "v0"); \ - __vecu64; }) +# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr) # define CRC_VEC_SWAP_TO_LE(v) (v) # define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v) # define VEC_U64_LO 0 # define VEC_U64_HI 1 -static const vector16x_u8 vec_load_le_const = - { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 }; + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load_be(unsigned int offset, const void *ptr) +{ + static const vector16x_u8 vec_load_le_const = + { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 }; + vector2x_u64 vecu64; + +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("lxvd2x %%vs32,0,%1\n\t" + "vperm %0,%%v0,%%v0,%2\n\t" + : "=v" (vecu64) + : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const) + : "memory", "v0"); +#endif + else + __asm__ ("lxvd2x %%vs32,%1,%2\n\t" + "vperm %0,%%v0,%%v0,%3\n\t" + : "=v" (vecu64) + : "r" (offset), "r" ((uintptr_t)(ptr)), + "v" (vec_load_le_const) + : "memory", "r0", "v0"); + + return vecu64; +} #endif |