summaryrefslogtreecommitdiff
path: root/cipher/sha512.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2016-02-08 20:13:38 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2016-02-08 20:13:38 +0200
commit8353884bc65c820d5bcacaf1ac23cdee72091a09 (patch)
treee203452ffb6274d0b9d7515268fb0e490dd371c2 /cipher/sha512.c
parentb8b3361504950689ef1e779fb3357cecf8a9f739 (diff)
downloadlibgcrypt-8353884bc65c820d5bcacaf1ac23cdee72091a09.tar.gz
Add ARM assembly implementation of SHA-512
* cipher/Makefile.am: Add 'sha512-arm.S'. * cipher/sha512-arm.S: New. * cipher/sha512.c (USE_ARM_ASM): New. (_gcry_sha512_transform_arm): New. (transform) [USE_ARM_ASM]: Use ARM assembly implementation instead of generic. * configure.ac: Add 'sha512-arm.lo'. -- Benchmark on Cortex-A8 (armv6, 1008 Mhz): Before: | nanosecs/byte mebibytes/sec cycles/byte SHA512 | 112.0 ns/B 8.52 MiB/s 112.9 c/B After (3.3x faster): | nanosecs/byte mebibytes/sec cycles/byte SHA512 | 34.01 ns/B 28.04 MiB/s 34.28 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/sha512.c')
-rw-r--r--cipher/sha512.c82
1 files changed, 50 insertions, 32 deletions
diff --git a/cipher/sha512.c b/cipher/sha512.c
index 1196db93..5b259650 100644
--- a/cipher/sha512.c
+++ b/cipher/sha512.c
@@ -66,6 +66,13 @@
#endif /*ENABLE_NEON_SUPPORT*/
+/* USE_ARM_ASM indicates whether to enable ARM assembly code. */
+#undef USE_ARM_ASM
+#if defined(__ARMEL__) && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS)
+# define USE_ARM_ASM 1
+#endif
+
+
/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */
#undef USE_SSSE3
#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \
@@ -204,36 +211,6 @@ sha384_init (void *context, unsigned int flags)
}
-static inline u64
-ROTR (u64 x, u64 n)
-{
- return ((x >> n) | (x << (64 - n)));
-}
-
-static inline u64
-Ch (u64 x, u64 y, u64 z)
-{
- return ((x & y) ^ ( ~x & z));
-}
-
-static inline u64
-Maj (u64 x, u64 y, u64 z)
-{
- return ((x & y) ^ (x & z) ^ (y & z));
-}
-
-static inline u64
-Sum0 (u64 x)
-{
- return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
-}
-
-static inline u64
-Sum1 (u64 x)
-{
- return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
-}
-
static const u64 k[] =
{
U64_C(0x428a2f98d728ae22), U64_C(0x7137449123ef65cd),
@@ -278,6 +255,38 @@ static const u64 k[] =
U64_C(0x5fcb6fab3ad6faec), U64_C(0x6c44198c4a475817)
};
+#ifndef USE_ARM_ASM
+
+static inline u64
+ROTR (u64 x, u64 n)
+{
+ return ((x >> n) | (x << (64 - n)));
+}
+
+static inline u64
+Ch (u64 x, u64 y, u64 z)
+{
+ return ((x & y) ^ ( ~x & z));
+}
+
+static inline u64
+Maj (u64 x, u64 y, u64 z)
+{
+ return ((x & y) ^ (x & z) ^ (y & z));
+}
+
+static inline u64
+Sum0 (u64 x)
+{
+ return (ROTR (x, 28) ^ ROTR (x, 34) ^ ROTR (x, 39));
+}
+
+static inline u64
+Sum1 (u64 x)
+{
+ return (ROTR (x, 14) ^ ROTR (x, 18) ^ ROTR (x, 41));
+}
+
/****************
* Transform the message W which consists of 16 64-bit-words
*/
@@ -304,7 +313,6 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data)
#define S0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7))
#define S1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
-
for (t = 0; t < 80 - 16; )
{
u64 t1, t2;
@@ -545,7 +553,7 @@ transform_blk (SHA512_STATE *hd, const unsigned char *data)
return /* burn_stack */ (8 + 16) * sizeof(u64) + sizeof(u32) +
3 * sizeof(void*);
}
-
+#endif /*!USE_ARM_ASM*/
/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional
* stack to store XMM6-XMM15 needed on Win64. */
@@ -568,6 +576,12 @@ void _gcry_sha512_transform_armv7_neon (SHA512_STATE *hd,
const u64 k[], size_t num_blks);
#endif
+#ifdef USE_ARM_ASM
+unsigned int _gcry_sha512_transform_arm (SHA512_STATE *hd,
+ const unsigned char *data,
+ const u64 k[], size_t num_blks);
+#endif
+
#ifdef USE_SSSE3
unsigned int _gcry_sha512_transform_amd64_ssse3(const void *input_data,
void *state,
@@ -622,6 +636,9 @@ transform (void *context, const unsigned char *data, size_t nblks)
}
#endif
+#ifdef USE_ARM_ASM
+ burn = _gcry_sha512_transform_arm (&ctx->state, data, k, nblks);
+#else
do
{
burn = transform_blk (&ctx->state, data) + 3 * sizeof(void*);
@@ -636,6 +653,7 @@ transform (void *context, const unsigned char *data, size_t nblks)
*/
burn += ASM_EXTRA_STACK;
#endif
+#endif
return burn;
}