summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2020-02-02 19:52:08 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2020-02-02 19:52:08 +0200
commitb64b029318e7d0b66123015146614118f466a7a9 (patch)
tree974284d2766bdbb978b4d888946633b1f7478c4d
parent89776d45c824032409f581e5fd1db6bf149df57f (diff)
downloadlibgcrypt-b64b029318e7d0b66123015146614118f466a7a9.tar.gz
crc-ppc: fix bad register used for vector load/store assembly
* cipher/crc-ppc.c (CRC_VEC_U64_LOAD_BE): Move implementation to... (asm_vec_u64_load_be): ...here; Add "r0" to clobber list for load instruction when offset is not zero; Add zero offset path. -- Register r0 must not be used for RA input for vector load/store instructions as r0 is not read as register but as value '0'. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--cipher/crc-ppc.c41
1 files changed, 28 insertions, 13 deletions
diff --git a/cipher/crc-ppc.c b/cipher/crc-ppc.c
index 7dda90c5..4d7f0add 100644
--- a/cipher/crc-ppc.c
+++ b/cipher/crc-ppc.c
@@ -1,5 +1,5 @@
/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation
- * Copyright (C) 2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -168,22 +168,37 @@ static const vector16x_u8 bswap_const ALIGNED_64 =
# define CRC_VEC_U64_LOAD(offs, ptr) \
vec_vsx_ld((offs), (const unsigned long long *)(ptr))
# define CRC_VEC_U64_LOAD_LE(offs, ptr) CRC_VEC_U64_LOAD((offs), (ptr))
-# define CRC_VEC_U64_LOAD_BE(offs, ptr) \
- ({ \
- vector2x_u64 __vecu64; \
- __asm__ ("lxvd2x %%vs32,%1,%2\n\t" \
- "vperm %0,%%v0,%%v0,%3\n\t" \
- : "=v" (__vecu64) \
- : "r" (offs), "r" ((uintptr_t)(ptr)), \
- "v" (vec_load_le_const) \
- : "memory", "v0"); \
- __vecu64; })
+# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr)
# define CRC_VEC_SWAP_TO_LE(v) (v)
# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v)
# define VEC_U64_LO 0
# define VEC_U64_HI 1
-static const vector16x_u8 vec_load_le_const =
- { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+
+static ASM_FUNC_ATTR_INLINE vector2x_u64
+asm_vec_u64_load_be(unsigned int offset, const void *ptr)
+{
+ static const vector16x_u8 vec_load_le_const =
+ { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 };
+ vector2x_u64 vecu64;
+
+#if __GNUC__ >= 4
+ if (__builtin_constant_p (offset) && offset == 0)
+ __asm__ ("lxvd2x %%vs32,0,%1\n\t"
+ "vperm %0,%%v0,%%v0,%2\n\t"
+ : "=v" (vecu64)
+ : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const)
+ : "memory", "v0");
+#endif
+ else
+ __asm__ ("lxvd2x %%vs32,%1,%2\n\t"
+ "vperm %0,%%v0,%%v0,%3\n\t"
+ : "=v" (vecu64)
+ : "r" (offset), "r" ((uintptr_t)(ptr)),
+ "v" (vec_load_le_const)
+ : "memory", "r0", "v0");
+
+ return vecu64;
+}
#endif