summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLegrandin <helderijs@gmail.com>2014-03-23 18:46:55 +0100
committerDwayne Litzenberger <dlitz@dlitz.net>2014-06-22 23:38:31 -0700
commit947b554d85012cf35185ded38ef3484de010d2cf (patch)
tree8f1d16cc5a980d59dd7fadb70c1886009219ce5a
parent0782d68840d0ebf850516e606e398b8a5396eb64 (diff)
downloadpycrypto-947b554d85012cf35185ded38ef3484de010d2cf.tar.gz
Make GHASH more robust against timing attacks.
In order to speed up as much as possible the GHASH, the current implementation expands the 16 byte hash key (H) into a table of 64 KBytes. However, that is sensitive to cache-based timing attacks. If we assume that access to data inside the same cache line is constant-time (likely), fitting a table item into a cache line may help against the attacks. This patch reduce the pre-computed table from 64K to 4K and aligns every item to a 32 byte boundary (since most modern CPUs have cache line of that size or larger). This patch will reduce the overall performance. This patch also reverts commit 965871a727 ("GCM mode: Optimize key setup for GCM mode") since I actually got conflicting benchmark results.
-rw-r--r--configure.ac1
-rw-r--r--lib/Crypto/Cipher/blockalgo.py13
-rw-r--r--src/config.h.in7
-rw-r--r--src/galois.c279
4 files changed, 105 insertions, 195 deletions
diff --git a/configure.ac b/configure.ac
index 5f22d00..3bba18d 100644
--- a/configure.ac
+++ b/configure.ac
@@ -99,6 +99,7 @@ AC_TYPE_UINT16_T
AC_TYPE_UINT32_T
AC_TYPE_UINT64_T
AC_TYPE_UINT8_T
+AC_TYPE_UINTPTR_T
# Checks for library functions.
AC_FUNC_MALLOC
diff --git a/lib/Crypto/Cipher/blockalgo.py b/lib/Crypto/Cipher/blockalgo.py
index 84b9bc3..db45404 100644
--- a/lib/Crypto/Cipher/blockalgo.py
+++ b/lib/Crypto/Cipher/blockalgo.py
@@ -329,17 +329,14 @@ class _GHASH(_SmoothMAC):
(x^128 + x^7 + x^2 + x + 1).
"""
- def __init__(self, hash_subkey, block_size, table_size='64K'):
+ def __init__(self, hash_subkey, block_size):
_SmoothMAC.__init__(self, block_size, None, 0)
- if table_size == '64K':
- self._hash_subkey = galois._ghash_expand(hash_subkey)
- else:
- self._hash_subkey = hash_subkey
+ self._hash_subkey = galois._ghash_expand(hash_subkey)
self._last_y = bchr(0) * 16
self._mac = galois._ghash
def copy(self):
- clone = _GHASH(self._hash_subkey, self._bs, table_size='0K')
+ clone = _GHASH(self._hash_subkey, self._bs)
_SmoothMAC._deep_copy(self, clone)
clone._last_y = self._last_y
return clone
@@ -436,7 +433,7 @@ class BlockAlgo:
bchr(0) * fill +
long_to_bytes(8 * len(self.nonce), 8))
- mac = _GHASH(hash_subkey, factory.block_size, '0K')
+ mac = _GHASH(hash_subkey, factory.block_size)
mac.update(ghash_in)
self._j0 = bytes_to_long(mac.digest())
@@ -446,7 +443,7 @@ class BlockAlgo:
self._cipher = self._factory.new(key, MODE_CTR, counter=ctr)
# Step 5 - Bootstrat GHASH
- self._cipherMAC = _GHASH(hash_subkey, factory.block_size, '64K')
+ self._cipherMAC = _GHASH(hash_subkey, factory.block_size)
# Step 6 - Prepare GCTR cipher for GMAC
ctr = Counter.new(128, initial_value=self._j0, allow_wraparound=True)
diff --git a/src/config.h.in b/src/config.h.in
index 36278c1..f18fcb0 100644
--- a/src/config.h.in
+++ b/src/config.h.in
@@ -69,6 +69,9 @@
/* Define to 1 if you have the <sys/types.h> header file. */
#undef HAVE_SYS_TYPES_H
+/* Define to 1 if the system has the type `uintptr_t'. */
+#undef HAVE_UINTPTR_T
+
/* Define to 1 if you have the <unistd.h> header file. */
#undef HAVE_UNISTD_H
@@ -160,3 +163,7 @@
/* Define to the type of an unsigned integer type of width exactly 8 bits if
such a type exists and the standard includes do not define it. */
#undef uint8_t
+
+/* Define to the type of an unsigned integer type wide enough to hold a
+ pointer, if such a type exists, and if the system does not define it. */
+#undef uintptr_t
diff --git a/src/galois.c b/src/galois.c
index 2660044..0da79e6 100644
--- a/src/galois.c
+++ b/src/galois.c
@@ -25,10 +25,28 @@
#include <assert.h>
#include <string.h>
-/** Type for tables containing the expanded hash key **/
-typedef uint64_t t_key_tables[16][256][2];
+#define ALIGNMENT 32
-typedef uint64_t t_v_tables[128][2];
+/**
+ * A V table is a 4096 bytes table that will contain the expanded
+ * GHASH key (H). It is used to speed up the GF(128) multiplication Z = X*H.
+ *
+ * The table contains 128 entries, one for each bit of X.
+ * Each entry takes 32 bytes and can fit into the cache line of a modern
+ * processor. If we assume that access to memory mapped to the same
+ * cache line is somewhat constant, we can make GHASH robust again
+ * cache timing attacks.
+ */
+typedef uint64_t t_v_tables[128][2][2];
+
+/**
+ * To ensure that the V table is aligned to a 32-byte memory boundary,
+ * we allocate a larger piece of memory and carve the V table from there.
+ */
+typedef struct {
+ uint8_t buffer[sizeof(t_v_tables)+ALIGNMENT];
+ t_v_tables *v_tables;
+} t_exp_key;
/**
* Big Endian to word conversions
@@ -56,230 +74,121 @@ static void word_to_be(uint8_t fb[8], uint64_t w)
}
/**
- * Compute H*x^i for i=0..127. Store the results in a new
- * vector indexed by i.
+ * Create a V table. V[i] is the value H*x^i (i=0..127).
+ * \param h The 16 byte GHASH key
+ * \param tables A pointer to an allocated V table
*/
-static const t_v_tables* make_v_tables(const uint8_t y[16])
+static void make_v_tables(const uint8_t h[16], t_v_tables *tables)
{
- t_v_tables *tables;
- uint64_t *cur;
+ uint64_t (*cur)[2];
int i;
- tables = (t_v_tables*) calloc(128*2, sizeof(uint64_t));
- if (!tables) {
- return NULL;
- }
+ memset(tables, 0, sizeof(t_v_tables));
- cur = &((*tables)[0][0]);
+ cur = &((*tables)[0][1]);
- cur[0] = be_to_word(&y[0]);
- cur[1] = be_to_word(&y[8]);
+ (*cur)[0] = be_to_word(&h[0]);
+ (*cur)[1] = be_to_word(&h[8]);
for (i=1; i<128; i++) {
uint64_t c;
- uint64_t *next;
+ uint64_t (*next)[2];
- next = &((*tables)[i][0]);
+ next = &((*tables)[i][1]);
/** v = (v&1)*0xE1000000000000000000000000000000L ^ (v>>1) **/
- c = cur[1]&1 ? 0xE100000000000000 : 0;
- next[1] = cur[1]>>1 | cur[0]<<63;
- next[0] = cur[0]>>1 ^ c;
+ c = (*cur)[1]&1 ? 0xE100000000000000 : 0;
+ (*next)[1] = (*cur)[1]>>1 | (*cur)[0]<<63;
+ (*next)[0] = (*cur)[0]>>1 ^ c;
cur = next;
}
-
- return (const t_v_tables*)tables;
-}
-
-/**
- * Multiply to elements of GF(2**128) using the reducing polynomial
- * (x^128 + x^7 + x^2 + x + 1).
- */
-static void gcm_mult(uint8_t out[16], const uint8_t x[16], const uint8_t y[16])
-{
- uint64_t z[2], v[2];
- int i;
-
- /** z, v = 0, y **/
- z[0] = z[1] = 0;
- v[0] = be_to_word(&y[0]);
- v[1] = be_to_word(&y[8]);
-
- for (i=0; i<16; i++) {
- uint8_t j;
-
- for (j=0x80; j>0; j>>=1) {
- uint64_t c;
-
- /** z ^= (x>>i&1)*v **/
- if (x[i] & j) {
-
- z[0] ^= v[0];
- z[1] ^= v[1];
- }
- /** v = (v&1)*0xE1000000000000000000000000000000L ^ (v>>1) **/
- c = v[1]&1 ? 0xE100000000000000 : 0;
- v[1] = v[1]>>1 | (v[0] << 63);
- v[0] = v[0]>>1 ^ c;
- }
- }
- word_to_be(out, z[0]);
- word_to_be(out+8, z[1]);
}
/**
* Multiply two elements of GF(2**128) using the reducing polynomial
* (x^128 + x^7 + x^2 + x + 1).
*
- * The first element has been expanded into H tables.
+ * \param out The 16 byte buffer that will receive the result
+ * \param key_tables One factor, expanded into a V table
+ * \param x The other factor (16 bytes)
*/
-static void gcm_mult2(uint8_t out[16], const t_key_tables *key_tables, const uint8_t x[16])
+static void gcm_mult2(uint8_t out[16], const t_v_tables *key_tables, const uint8_t x[16])
{
- int i;
+ int i, bit_scan_128;
uint64_t z[2];
z[0] = z[1] = 0;
+ bit_scan_128 = 0;
for (i=0; i<16; i++) {
- z[0] ^= (*key_tables)[i][x[i]][0];
- z[1] ^= (*key_tables)[i][x[i]][1];
- }
- word_to_be(out, z[0]);
- word_to_be(out+8, z[1]);
-}
+ uint8_t xi;
+ int j;
-/**
- * Multiply two elements of GF(2**128) using the reducing polynomial
- * (x^128 + x^7 + x^2 + x + 1).
- *
- * In first element, only the byte at position 'pos' is non-zero at has
- * value 'x'.
- *
- * The second element, is expanded in V tables (128 elements, one per
- * each bit position).
- */
-static void gcm_mult3(uint64_t out[2], uint8_t x, uint8_t pos, const t_v_tables *v_tables)
-{
- uint64_t z[2];
- int j;
- const uint64_t (*v)[2];
+ xi = x[i];
+ for (j=0; j<8; j++) {
+ int bit;
- /** z, v = 0, y **/
- z[0] = z[1] = 0;
+ bit = xi>>7 & 1; /** Constant time */
+ z[0] ^= (*key_tables)[bit_scan_128][bit][0];
+ z[1] ^= (*key_tables)[bit_scan_128][bit][1];
- v = &((*v_tables)[pos*8]);
- for (j=0x80; j!=0; j>>=1, v++) {
- if (x & j) {
- z[0] ^= (*v)[0];
- z[1] ^= (*v)[1];
+ xi <<= 1;
+ bit_scan_128++;
}
}
- out[0] = z[0];
- out[1] = z[1];
+
+ word_to_be(out, z[0]);
+ word_to_be(out+8, z[1]);
}
-/**
- * Expand a hash key into a set of tables that will speed
- * up GHASH.
- *
- * \param tables Pointer to allocated memory that will hold
- * the tables.
- * \param h The hash key.
- */
-static int ghash_expand(t_key_tables *key_tables, const uint8_t h[16])
-{
- int i;
- const t_v_tables *v_tables;
-
- v_tables = make_v_tables(h);
- if (v_tables==NULL) {
- return -1;
- }
-
- for (i=0; i<16; i++) {
- int j;
-
- for (j=0; j<256; j++) {
- /** Z = H*j*P^{8i} **/
- gcm_mult3(&((*key_tables)[i][j][0]), j, i, v_tables);
- }
- }
-
- free(v_tables);
- return 0;
-}
/**
- * Compute the GHASH of a piece of an arbitrary data given an
- * arbitrary Y_0, as specified in NIST SP 800 38D.
+ * Compute the GHASH of a piece of data given an arbitrary Y_0,
+ * as specified in NIST SP 800 38D.
*
- * \param y_out The resulting GHASH (16 bytes).
- * \param block_data Pointer to the data to hash.
- * \param len Length of the data to hash (multiple of 16).
- * \param y_in The initial Y (Y_0, 16 bytes).
- * \param key_tables The hash key, possibly expanded to 16*256*16 bytes.
- * \param key_tables_len The length of the data pointed by key_table.
+ * \param y_out The resulting GHASH (16 bytes).
+ * \param block_data Pointer to the data to hash.
+ * \param len Length of the data to hash (multiple of 16).
+ * \param y_in The initial Y (Y_0, 16 bytes).
+ * \param key_tables The expanded hash key (16*256*16 bytes).
*/
static void ghash(
uint8_t y_out[16],
const uint8_t block_data[],
int len,
const uint8_t y_in[16],
- const void *key_tables,
- int key_tables_len
+ const t_v_tables *key_tables
)
{
- int i, j;
- uint8_t x[16];
- const t_key_tables *key_tables_64 = NULL;
- const uint8_t (*key)[16] = NULL;
-
- switch (key_tables_len) {
- case sizeof(t_key_tables):
- {
- key_tables_64 = (const t_key_tables*) key_tables;
- break;
- }
- case 16:
- {
- key = (const uint8_t (*)[16]) key_tables;
- break;
- }
- default:
- return;
- }
+ int i;
memcpy(y_out, y_in, 16);
+ for (i=0; i<len; i+=16) {
+ int j;
+ uint8_t x[16];
- if (key_tables_64) {
- for (i=0; i<len; i+=16) {
- for (j=0; j<16; j++) {
- x[j] = y_out[j] ^ block_data[i+j];
- }
- gcm_mult2(y_out, key_tables_64, x);
- }
- } else {
- for (i=0; i<len; i+=16) {
- for (j=0; j<16; j++) {
- x[j] = y_out[j] ^ block_data[i+j];
- }
- gcm_mult(y_out, *key, x);
+ for (j=0; j<16; j++) {
+ x[j] = y_out[j] ^ block_data[i+j];
}
+ gcm_mult2(y_out, key_tables, x);
}
}
static char ghash_expand__doc__[] =
"_ghash_expand(h:str) -> str\n"
"\n"
-"Return an expanded hash key for GHASH.\n";
+"Return an expanded GHASH key.\n";
+/**
+ * Expand the AES key into a Python (byte) string object.
+ */
static PyObject *
ghash_expand_function(PyObject *self, PyObject *args)
{
PyObject *h;
PyObject *retval = NULL;
Py_ssize_t len_h;
- int err;
+ t_exp_key *exp_key;
if (!PyArg_ParseTuple(args, "S", &h)) {
goto out;
@@ -292,50 +201,46 @@ ghash_expand_function(PyObject *self, PyObject *args)
goto out;
}
- /* Create return string */
- retval = PyBytes_FromStringAndSize(NULL, sizeof(t_key_tables));
+ retval = PyBytes_FromStringAndSize(NULL, sizeof(t_exp_key));
if (!retval) {
goto out;
}
- Py_BEGIN_ALLOW_THREADS;
-
- err = ghash_expand(
- (t_key_tables*)PyBytes_AS_STRING(retval),
- (uint8_t*)PyBytes_AS_STRING(h)
- );
+ exp_key = (t_exp_key*) PyBytes_AS_STRING(retval);
+ exp_key->v_tables = (t_v_tables*)
+ (((uintptr_t)exp_key->buffer & ~(ALIGNMENT-1)) + ALIGNMENT);
+ Py_BEGIN_ALLOW_THREADS;
+
+ make_v_tables((uint8_t*)PyBytes_AS_STRING(h), exp_key->v_tables);
+
Py_END_ALLOW_THREADS;
- if (err!=0) {
- Py_DECREF(retval);
- retval = NULL;
- }
-
out:
return retval;
}
static char ghash__doc__[] =
-"_ghash(data:str, y:str, exp_h:str) -> str\n"
+"_ghash(data:str, y:str, exp_key:str) -> str\n"
"\n"
"Return a GHASH.\n";
static PyObject *
ghash_function(PyObject *self, PyObject *args)
{
- PyObject *data, *y, *exp_h;
+ PyObject *data, *y, *exp_key;
PyObject *retval = NULL;
- Py_ssize_t len_data, len_y, len_exp_h;
+ Py_ssize_t len_data, len_y, len_exp_key;
+ const t_v_tables *v_tables;
- if (!PyArg_ParseTuple(args, "SSS", &data, &y, &exp_h)) {
+ if (!PyArg_ParseTuple(args, "SSS", &data, &y, &exp_key)) {
goto out;
}
len_data = PyBytes_GET_SIZE(data);
len_y = PyBytes_GET_SIZE(y);
- len_exp_h = PyBytes_GET_SIZE(exp_h);
+ len_exp_key = PyBytes_GET_SIZE(exp_key);
if (len_data%16!=0) {
PyErr_SetString(PyExc_ValueError, "Length of data must be a multiple of 16 bytes.");
@@ -347,7 +252,7 @@ ghash_function(PyObject *self, PyObject *args)
goto out;
}
- if (len_exp_h!=sizeof(t_key_tables) && len_exp_h!=16) {
+ if (len_exp_key!=sizeof(t_exp_key)) {
PyErr_SetString(PyExc_ValueError, "Length of expanded key is incorrect.");
goto out;
}
@@ -361,10 +266,10 @@ ghash_function(PyObject *self, PyObject *args)
Py_BEGIN_ALLOW_THREADS;
#define PyBytes_Buffer(a) (uint8_t*)PyBytes_AS_STRING(a)
-
+
+ v_tables = (const t_v_tables*)((t_exp_key *)PyBytes_AS_STRING(exp_key))->v_tables;
ghash( PyBytes_Buffer(retval), PyBytes_Buffer(data), len_data,
- PyBytes_Buffer(y),
- PyBytes_Buffer(exp_h), len_exp_h );
+ PyBytes_Buffer(y), v_tables);
#undef PyBytes_Buffer