summaryrefslogtreecommitdiff
path: root/cipher/cipher-gcm.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2019-04-27 22:03:31 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2019-04-27 22:03:31 +0300
commitecd02cdd61e8c690f48637656f0e1e08b750fe30 (patch)
treeba9a35eb7281169118e63abf0c884882dddbfc30 /cipher/cipher-gcm.c
parentaf5f3fb08674608acf6617ea622ed0b9a2ee77a5 (diff)
downloadlibgcrypt-ecd02cdd61e8c690f48637656f0e1e08b750fe30.tar.gz
Optimizations for generic table-based GCM implementations
* cipher/cipher-gcm.c [GCM_TABLES_USE_U64] (do_fillM): Precalculate M[32..63] values. [GCM_TABLES_USE_U64] (do_ghash): Split processing of two 64-bit halfs of the input to two separate loops; Use precalculated M[] values. [GCM_USE_TABLES && !GCM_TABLES_USE_U64] (do_fillM): Precalculate M[64..127] values. [GCM_USE_TABLES && !GCM_TABLES_USE_U64] (do_ghash): Use precalculated M[] values. [GCM_USE_TABLES] (bshift): Avoid conditional execution for mask calculation. * cipher/cipher-internal.h (gcry_cipher_handle): Double gcm_table size. -- Benchmark on Intel Haswell (amd64, --disable-hwf all): Before: | nanosecs/byte mebibytes/sec cycles/byte auto Mhz GMAC_AES | 2.79 ns/B 341.3 MiB/s 11.17 c/B 3998 After (~36% faster): | nanosecs/byte mebibytes/sec cycles/byte auto Mhz GMAC_AES | 2.05 ns/B 464.7 MiB/s 8.20 c/B 3998 Benchmark on Intel Haswell (win32, --disable-hwf all): Before: | nanosecs/byte mebibytes/sec cycles/byte auto Mhz GMAC_AES | 4.90 ns/B 194.8 MiB/s 19.57 c/B 3997 After (~36% faster): | nanosecs/byte mebibytes/sec cycles/byte auto Mhz GMAC_AES | 3.58 ns/B 266.4 MiB/s 14.31 c/B 3999 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/cipher-gcm.c')
-rw-r--r--cipher/cipher-gcm.c84
1 files changed, 58 insertions, 26 deletions
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c
index cbda87be..c19f09f2 100644
--- a/cipher/cipher-gcm.c
+++ b/cipher/cipher-gcm.c
@@ -1,6 +1,6 @@
/* cipher-gcm.c - Generic Galois Counter Mode implementation
* Copyright (C) 2013 Dmitry Eremin-Solenikov
- * Copyright (C) 2013, 2018 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2013, 2018-2019 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -126,7 +126,7 @@ bshift (u64 * b0, u64 * b1)
t[0] = *b0;
t[1] = *b1;
- mask = t[1] & 1 ? 0xe1 : 0;
+ mask = -(t[1] & 1) & 0xe1;
mask <<= 56;
*b1 = (t[1] >> 1) ^ (t[0] << 63);
@@ -158,6 +158,12 @@ do_fillM (unsigned char *h, u64 *M)
M[(i + j) + 0] = M[i + 0] ^ M[j + 0];
M[(i + j) + 16] = M[i + 16] ^ M[j + 16];
}
+
+ for (i = 0; i < 16; i++)
+ {
+ M[i + 32] = (M[i + 0] >> 4) ^ ((u64) gcmR[(M[i + 16] & 0xf) << 4] << 48);
+ M[i + 48] = (M[i + 16] >> 4) ^ (M[i + 0] << 60);
+ }
}
static inline unsigned int
@@ -175,20 +181,18 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM)
V[1] = be_bswap64 (V[1]);
/* First round can be manually tweaked based on fact that 'tmp' is zero. */
- i = 15;
-
- M = &gcmM[(V[1] & 0xf)];
+ M = &gcmM[(V[1] & 0xf) + 32];
V[1] >>= 4;
- tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48);
- tmp[1] = (M[16] >> 4) ^ (M[0] << 60);
+ tmp[0] = M[0];
+ tmp[1] = M[16];
tmp[0] ^= gcmM[(V[1] & 0xf) + 0];
tmp[1] ^= gcmM[(V[1] & 0xf) + 16];
V[1] >>= 4;
- --i;
+ i = 6;
while (1)
{
- M = &gcmM[(V[1] & 0xf)];
+ M = &gcmM[(V[1] & 0xf) + 32];
V[1] >>= 4;
A = tmp[1] & 0xff;
@@ -196,15 +200,34 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM)
tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0];
tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16];
- tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[16] & 0xf) << 4] << 48);
- tmp[1] ^= (M[16] >> 4) ^ (M[0] << 60);
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[16];
+
+ if (i == 0)
+ break;
+
+ V[1] >>= 4;
+ --i;
+ }
+
+ i = 7;
+ while (1)
+ {
+ M = &gcmM[(V[0] & 0xf) + 32];
+ V[0] >>= 4;
+
+ A = tmp[1] & 0xff;
+ T = tmp[0];
+ tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[0] & 0xf) + 0];
+ tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[0] & 0xf) + 16];
+
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[16];
if (i == 0)
break;
- else if (i == 8)
- V[1] = V[0];
- else
- V[1] >>= 4;
+
+ V[0] >>= 4;
--i;
}
@@ -226,7 +249,7 @@ bshift (u32 * M, int i)
t[1] = M[i * 4 + 1];
t[2] = M[i * 4 + 2];
t[3] = M[i * 4 + 3];
- mask = t[3] & 1 ? 0xe1 : 0;
+ mask = -(t[3] & 1) & 0xe1;
M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31);
M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31);
@@ -267,6 +290,15 @@ do_fillM (unsigned char *h, u32 *M)
M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2];
M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3];
}
+
+ for (i = 0; i < 4 * 16; i += 4)
+ {
+ M[i + 0 + 64] = (M[i + 0] >> 4)
+ ^ ((u64) gcmR[(M[i + 3] << 4) & 0xf0] << 16);
+ M[i + 1 + 64] = (M[i + 1] >> 4) ^ (M[i + 0] << 28);
+ M[i + 2 + 64] = (M[i + 2] >> 4) ^ (M[i + 1] << 28);
+ M[i + 3 + 64] = (M[i + 3] >> 4) ^ (M[i + 2] << 28);
+ }
}
static inline unsigned int
@@ -285,19 +317,19 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM)
i = 15;
v = V[i];
- M = &gcmM[(v & 0xf) * 4];
+ M = &gcmM[(v & 0xf) * 4 + 64];
v = (v & 0xf0) >> 4;
m = &gcmM[v * 4];
v = V[--i];
- tmp[0] = (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16) ^ m[0];
- tmp[1] = (M[1] >> 4) ^ (M[0] << 28) ^ m[1];
- tmp[2] = (M[2] >> 4) ^ (M[1] << 28) ^ m[2];
- tmp[3] = (M[3] >> 4) ^ (M[2] << 28) ^ m[3];
+ tmp[0] = M[0] ^ m[0];
+ tmp[1] = M[1] ^ m[1];
+ tmp[2] = M[2] ^ m[2];
+ tmp[3] = M[3] ^ m[3];
while (1)
{
- M = &gcmM[(v & 0xf) * 4];
+ M = &gcmM[(v & 0xf) * 4 + 64];
v = (v & 0xf0) >> 4;
m = &gcmM[v * 4];
@@ -309,10 +341,10 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM)
tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2];
tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3];
- tmp[0] ^= (M[0] >> 4) ^ ((u64) gcmR[(M[3] << 4) & 0xf0] << 16);
- tmp[1] ^= (M[1] >> 4) ^ (M[0] << 28);
- tmp[2] ^= (M[2] >> 4) ^ (M[1] << 28);
- tmp[3] ^= (M[3] >> 4) ^ (M[2] << 28);
+ tmp[0] ^= M[0];
+ tmp[1] ^= M[1];
+ tmp[2] ^= M[2];
+ tmp[3] ^= M[3];
if (i == 0)
break;