From 965871a72773457d73fda6a1a2970a4279dcbe6f Mon Sep 17 00:00:00 2001 From: Legrandin Date: Sat, 29 Jun 2013 18:35:49 +0200 Subject: GCM mode: Optimize key setup for GCM mode. GCM mode requires GHASH for 2 different operations: one for the data (AD + ciphertext) and one for the IV. Construction of tables to speed-up GHASH is very expensive and it is worth doing only for the data, not for the IV. This patch ensures that the GHASH for the IV does not use tables, with a ~40% faster key setup. [dlitz@dlitz.net: Whitespace fixed with "git rebase --whitespace=fix"] --- lib/Crypto/Cipher/blockalgo.py | 13 +++--- src/galois.c | 95 +++++++++++++++++++++++++++++++++++------- 2 files changed, 87 insertions(+), 21 deletions(-) diff --git a/lib/Crypto/Cipher/blockalgo.py b/lib/Crypto/Cipher/blockalgo.py index 9ac8710..0d21f44 100644 --- a/lib/Crypto/Cipher/blockalgo.py +++ b/lib/Crypto/Cipher/blockalgo.py @@ -329,14 +329,17 @@ class _GHASH(_SmoothMAC): (x^128 + x^7 + x^2 + x + 1). """ - def __init__(self, hash_subkey, block_size): + def __init__(self, hash_subkey, block_size, table_size='64K'): _SmoothMAC.__init__(self, block_size, None, 0) - self._hash_subkey = galois._ghash_expand(hash_subkey) + if table_size == '64K': + self._hash_subkey = galois._ghash_expand(hash_subkey) + else: + self._hash_subkey = hash_subkey self._last_y = bchr(0) * 16 self._mac = galois._ghash def copy(self): - clone = _GHASH(self._hash_subkey, self._bs) + clone = _GHASH(self._hash_subkey, self._bs, table_size='0K') _SmoothMAC._deep_copy(self, clone) clone._last_y = self._last_y return clone @@ -433,7 +436,7 @@ class BlockAlgo: bchr(0) * fill + long_to_bytes(8 * len(self.nonce), 8)) - mac = _GHASH(hash_subkey, factory.block_size) + mac = _GHASH(hash_subkey, factory.block_size, '0K') mac.update(ghash_in) self._j0 = bytes_to_long(mac.digest()) @@ -443,7 +446,7 @@ class BlockAlgo: self._cipher = self._factory.new(key, MODE_CTR, counter=ctr) # Step 5 - Bootstrat GHASH - self._cipherMAC = _GHASH(hash_subkey, factory.block_size) + self._cipherMAC = _GHASH(hash_subkey, factory.block_size, '64K') # Step 6 - Prepare GCTR cipher for GMAC ctr = Counter.new(128, initial_value=self._j0, allow_wraparound=True) diff --git a/src/galois.c b/src/galois.c index 3c76c99..2660044 100644 --- a/src/galois.c +++ b/src/galois.c @@ -92,6 +92,42 @@ static const t_v_tables* make_v_tables(const uint8_t y[16]) return (const t_v_tables*)tables; } +/** + * Multiply to elements of GF(2**128) using the reducing polynomial + * (x^128 + x^7 + x^2 + x + 1). + */ +static void gcm_mult(uint8_t out[16], const uint8_t x[16], const uint8_t y[16]) +{ + uint64_t z[2], v[2]; + int i; + + /** z, v = 0, y **/ + z[0] = z[1] = 0; + v[0] = be_to_word(&y[0]); + v[1] = be_to_word(&y[8]); + + for (i=0; i<16; i++) { + uint8_t j; + + for (j=0x80; j>0; j>>=1) { + uint64_t c; + + /** z ^= (x>>i&1)*v **/ + if (x[i] & j) { + + z[0] ^= v[0]; + z[1] ^= v[1]; + } + /** v = (v&1)*0xE1000000000000000000000000000000L ^ (v>>1) **/ + c = v[1]&1 ? 0xE100000000000000 : 0; + v[1] = v[1]>>1 | (v[0] << 63); + v[0] = v[0]>>1 ^ c; + } + } + word_to_be(out, z[0]); + word_to_be(out+8, z[1]); +} + /** * Multiply two elements of GF(2**128) using the reducing polynomial * (x^128 + x^7 + x^2 + x + 1). @@ -177,31 +213,58 @@ static int ghash_expand(t_key_tables *key_tables, const uint8_t h[16]) * Compute the GHASH of a piece of an arbitrary data given an * arbitrary Y_0, as specified in NIST SP 800 38D. * - * \param y_out The resulting GHASH (16 bytes). - * \param block_data Pointer to the data to hash. - * \param len Length of the data to hash (multiple of 16). - * \param y_in The initial Y (Y_0, 16 bytes). - * \param key_tables The expanded hash key (16*256*16 bytes). + * \param y_out The resulting GHASH (16 bytes). + * \param block_data Pointer to the data to hash. + * \param len Length of the data to hash (multiple of 16). + * \param y_in The initial Y (Y_0, 16 bytes). + * \param key_tables The hash key, possibly expanded to 16*256*16 bytes. + * \param key_tables_len The length of the data pointed by key_table. */ static void ghash( uint8_t y_out[16], const uint8_t block_data[], int len, const uint8_t y_in[16], - const t_key_tables *key_tables + const void *key_tables, + int key_tables_len ) { - int i; + int i, j; + uint8_t x[16]; + const t_key_tables *key_tables_64 = NULL; + const uint8_t (*key)[16] = NULL; + + switch (key_tables_len) { + case sizeof(t_key_tables): + { + key_tables_64 = (const t_key_tables*) key_tables; + break; + } + case 16: + { + key = (const uint8_t (*)[16]) key_tables; + break; + } + default: + return; + } memcpy(y_out, y_in, 16); - for (i=0; i