From fe8c8a126806fea4465c43d62a1f9d273a572bf5 Mon Sep 17 00:00:00 2001 From: Cesar Eduardo Barros Date: Mon, 25 Nov 2013 22:00:41 -0200 Subject: crypto: more robust crypto_memneq Disabling compiler optimizations can be fragile, since a new optimization could be added to -O0 or -Os that breaks the assumptions the code is making. Instead of disabling compiler optimizations, use a dummy inline assembly (based on RELOC_HIDE) to block the problematic kinds of optimization, while still allowing other optimizations to be applied to the code. The dummy inline assembly is added after every OR, and has the accumulator variable as its input and output. The compiler is forced to assume that the dummy inline assembly could both depend on the accumulator variable and change the accumulator variable, so it is forced to compute the value correctly before the inline assembly, and cannot assume anything about its value after the inline assembly. This change should be enough to make crypto_memneq work correctly (with data-independent timing) even if it is inlined at its call sites. That can be done later in a followup patch. Compile-tested on x86_64. Signed-off-by: Cesar Eduardo Barros Acked-by: Daniel Borkmann Signed-off-by: Herbert Xu --- crypto/Makefile | 5 ---- crypto/memneq.c | 79 +++++++++++++++++++++++++++++++++++++++------------------ 2 files changed, 54 insertions(+), 30 deletions(-) (limited to 'crypto') diff --git a/crypto/Makefile b/crypto/Makefile index 989c510da8cc..b29402a7b9b5 100644 --- a/crypto/Makefile +++ b/crypto/Makefile @@ -2,11 +2,6 @@ # Cryptographic API # -# memneq MUST be built with -Os or -O0 to prevent early-return optimizations -# that will defeat memneq's actual purpose to prevent timing attacks. -CFLAGS_REMOVE_memneq.o := -O1 -O2 -O3 -CFLAGS_memneq.o := -Os - obj-$(CONFIG_CRYPTO) += crypto.o crypto-y := api.o cipher.o compress.o memneq.o diff --git a/crypto/memneq.c b/crypto/memneq.c index cd0162221c14..570f6f3401ce 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c @@ -72,6 +72,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) while (size >= sizeof(unsigned long)) { neq |= *(unsigned long *)a ^ *(unsigned long *)b; + OPTIMIZER_HIDE_VAR(neq); a += sizeof(unsigned long); b += sizeof(unsigned long); size -= sizeof(unsigned long); @@ -79,6 +80,7 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ while (size > 0) { neq |= *(unsigned char *)a ^ *(unsigned char *)b; + OPTIMIZER_HIDE_VAR(neq); a += 1; b += 1; size -= 1; @@ -89,33 +91,60 @@ __crypto_memneq_generic(const void *a, const void *b, size_t size) /* Loop-free fast-path for frequently used 16-byte size */ static inline unsigned long __crypto_memneq_16(const void *a, const void *b) { + unsigned long neq = 0; + #ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS - if (sizeof(unsigned long) == 8) - return ((*(unsigned long *)(a) ^ *(unsigned long *)(b)) - | (*(unsigned long *)(a+8) ^ *(unsigned long *)(b+8))); - else if (sizeof(unsigned int) == 4) - return ((*(unsigned int *)(a) ^ *(unsigned int *)(b)) - | (*(unsigned int *)(a+4) ^ *(unsigned int *)(b+4)) - | (*(unsigned int *)(a+8) ^ *(unsigned int *)(b+8)) - | (*(unsigned int *)(a+12) ^ *(unsigned int *)(b+12))); - else + if (sizeof(unsigned long) == 8) { + neq |= *(unsigned long *)(a) ^ *(unsigned long *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned long *)(a+8) ^ *(unsigned long *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + } else if (sizeof(unsigned int) == 4) { + neq |= *(unsigned int *)(a) ^ *(unsigned int *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+4) ^ *(unsigned int *)(b+4); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+8) ^ *(unsigned int *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); + OPTIMIZER_HIDE_VAR(neq); + } else { #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ - return ((*(unsigned char *)(a) ^ *(unsigned char *)(b)) - | (*(unsigned char *)(a+1) ^ *(unsigned char *)(b+1)) - | (*(unsigned char *)(a+2) ^ *(unsigned char *)(b+2)) - | (*(unsigned char *)(a+3) ^ *(unsigned char *)(b+3)) - | (*(unsigned char *)(a+4) ^ *(unsigned char *)(b+4)) - | (*(unsigned char *)(a+5) ^ *(unsigned char *)(b+5)) - | (*(unsigned char *)(a+6) ^ *(unsigned char *)(b+6)) - | (*(unsigned char *)(a+7) ^ *(unsigned char *)(b+7)) - | (*(unsigned char *)(a+8) ^ *(unsigned char *)(b+8)) - | (*(unsigned char *)(a+9) ^ *(unsigned char *)(b+9)) - | (*(unsigned char *)(a+10) ^ *(unsigned char *)(b+10)) - | (*(unsigned char *)(a+11) ^ *(unsigned char *)(b+11)) - | (*(unsigned char *)(a+12) ^ *(unsigned char *)(b+12)) - | (*(unsigned char *)(a+13) ^ *(unsigned char *)(b+13)) - | (*(unsigned char *)(a+14) ^ *(unsigned char *)(b+14)) - | (*(unsigned char *)(a+15) ^ *(unsigned char *)(b+15))); + neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+2) ^ *(unsigned char *)(b+2); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+3) ^ *(unsigned char *)(b+3); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+4) ^ *(unsigned char *)(b+4); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+5) ^ *(unsigned char *)(b+5); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+6) ^ *(unsigned char *)(b+6); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+7) ^ *(unsigned char *)(b+7); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+8) ^ *(unsigned char *)(b+8); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+9) ^ *(unsigned char *)(b+9); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+10) ^ *(unsigned char *)(b+10); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+11) ^ *(unsigned char *)(b+11); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+12) ^ *(unsigned char *)(b+12); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+13) ^ *(unsigned char *)(b+13); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+14) ^ *(unsigned char *)(b+14); + OPTIMIZER_HIDE_VAR(neq); + neq |= *(unsigned char *)(a+15) ^ *(unsigned char *)(b+15); + OPTIMIZER_HIDE_VAR(neq); + } + + return neq; } /* Compare two areas of memory without leaking timing information, -- cgit v1.2.1 From 3110e4006ca9652276c2e17443949bcba7dbe789 Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Thu, 28 Nov 2013 19:20:04 +0100 Subject: crypto: pcrypt - Fix wrong usage of rcu_dereference() A kernel with enabled lockdep complains about the wrong usage of rcu_dereference() under a rcu_read_lock_bh() protected region. =============================== [ INFO: suspicious RCU usage. ] 3.13.0-rc1+ #126 Not tainted ------------------------------- linux/crypto/pcrypt.c:81 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 1 1 lock held by cryptomgr_test/153: #0: (rcu_read_lock_bh){.+....}, at: [] pcrypt_do_parallel.isra.2+0x5/0x200 Fix that by using rcu_dereference_bh() instead. Signed-off-by: Mathias Krause Cc: "David S. Miller" Acked-by: Steffen Klassert Signed-off-by: Herbert Xu --- crypto/pcrypt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'crypto') diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8c920cafe63..309d345ead95 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -78,7 +78,7 @@ static int pcrypt_do_parallel(struct padata_priv *padata, unsigned int *cb_cpu, cpu = *cb_cpu; rcu_read_lock_bh(); - cpumask = rcu_dereference(pcrypt->cb_cpumask); + cpumask = rcu_dereference_bh(pcrypt->cb_cpumask); if (cpumask_test_cpu(cpu, cpumask->mask)) goto out; -- cgit v1.2.1 From e37b94ebffdd93d121e5308eb93e2e55a0893fbb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 6 Dec 2013 00:33:33 +0100 Subject: crypto: memneq - fix for archs without efficient unaligned access Commit fe8c8a126806 introduced a possible build error for archs that do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS set. :/ Fix this up by bringing else braces outside of the ifdef. Reported-by: Fengguang Wu Fixes: fe8c8a126806 ("crypto: more robust crypto_memneq") Signed-off-by: Daniel Borkmann Acked-By: Cesar Eduardo Barros Signed-off-by: Herbert Xu --- crypto/memneq.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/crypto/memneq.c b/crypto/memneq.c index 570f6f3401ce..afed1bd16aee 100644 --- a/crypto/memneq.c +++ b/crypto/memneq.c @@ -108,8 +108,9 @@ static inline unsigned long __crypto_memneq_16(const void *a, const void *b) OPTIMIZER_HIDE_VAR(neq); neq |= *(unsigned int *)(a+12) ^ *(unsigned int *)(b+12); OPTIMIZER_HIDE_VAR(neq); - } else { + } else #endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + { neq |= *(unsigned char *)(a) ^ *(unsigned char *)(b); OPTIMIZER_HIDE_VAR(neq); neq |= *(unsigned char *)(a+1) ^ *(unsigned char *)(b+1); -- cgit v1.2.1 From 53f52d7aecb4cb3772872c902b73e0c685a56901 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 11 Dec 2013 14:28:47 -0800 Subject: crypto: tcrypt - Added speed tests for AEAD crypto alogrithms in tcrypt test suite Adding simple speed tests for a range of block sizes for AEAD crypto algorithms. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 270 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ crypto/tcrypt.h | 10 +++ 2 files changed, 280 insertions(+) (limited to 'crypto') diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 001f07cdb828..0d9003ae8c61 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -137,7 +137,272 @@ out: return ret; } +static int test_aead_jiffies(struct aead_request *req, int enc, + int blen, int sec) +{ + unsigned long start, end; + int bcount; + int ret; + + for (start = jiffies, end = start + sec * HZ, bcount = 0; + time_before(jiffies, end); bcount++) { + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + + if (ret) + return ret; + } + + printk("%d operations in %d seconds (%ld bytes)\n", + bcount, sec, (long)bcount * blen); + return 0; +} + +static int test_aead_cycles(struct aead_request *req, int enc, int blen) +{ + unsigned long cycles = 0; + int ret = 0; + int i; + + local_irq_disable(); + + /* Warm-up run. */ + for (i = 0; i < 4; i++) { + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + + if (ret) + goto out; + } + + /* The real thing. */ + for (i = 0; i < 8; i++) { + cycles_t start, end; + + start = get_cycles(); + if (enc) + ret = crypto_aead_encrypt(req); + else + ret = crypto_aead_decrypt(req); + end = get_cycles(); + + if (ret) + goto out; + + cycles += end - start; + } + +out: + local_irq_enable(); + + if (ret == 0) + printk("1 operation in %lu cycles (%d bytes)\n", + (cycles + 4) / 8, blen); + + return ret; +} + static u32 block_sizes[] = { 16, 64, 256, 1024, 8192, 0 }; +static u32 aead_sizes[] = { 16, 64, 256, 512, 1024, 2048, 4096, 8192, 0 }; + +#define XBUFSIZE 8 +#define MAX_IVLEN 32 + +static int testmgr_alloc_buf(char *buf[XBUFSIZE]) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) { + buf[i] = (void *)__get_free_page(GFP_KERNEL); + if (!buf[i]) + goto err_free_buf; + } + + return 0; + +err_free_buf: + while (i-- > 0) + free_page((unsigned long)buf[i]); + + return -ENOMEM; +} + +static void testmgr_free_buf(char *buf[XBUFSIZE]) +{ + int i; + + for (i = 0; i < XBUFSIZE; i++) + free_page((unsigned long)buf[i]); +} + +static void sg_init_aead(struct scatterlist *sg, char *xbuf[XBUFSIZE], + unsigned int buflen) +{ + int np = (buflen + PAGE_SIZE - 1)/PAGE_SIZE; + int k, rem; + + np = (np > XBUFSIZE) ? XBUFSIZE : np; + rem = buflen % PAGE_SIZE; + if (np > XBUFSIZE) { + rem = PAGE_SIZE; + np = XBUFSIZE; + } + sg_init_table(sg, np); + for (k = 0; k < np; ++k) { + if (k == (np-1)) + sg_set_buf(&sg[k], xbuf[k], rem); + else + sg_set_buf(&sg[k], xbuf[k], PAGE_SIZE); + } +} + +static void test_aead_speed(const char *algo, int enc, unsigned int sec, + struct aead_speed_template *template, + unsigned int tcount, u8 authsize, + unsigned int aad_size, u8 *keysize) +{ + unsigned int i, j; + struct crypto_aead *tfm; + int ret = -ENOMEM; + const char *key; + struct aead_request *req; + struct scatterlist *sg; + struct scatterlist *asg; + struct scatterlist *sgout; + const char *e; + void *assoc; + char iv[MAX_IVLEN]; + char *xbuf[XBUFSIZE]; + char *xoutbuf[XBUFSIZE]; + char *axbuf[XBUFSIZE]; + unsigned int *b_size; + unsigned int iv_len; + + if (enc == ENCRYPT) + e = "encryption"; + else + e = "decryption"; + + if (testmgr_alloc_buf(xbuf)) + goto out_noxbuf; + if (testmgr_alloc_buf(axbuf)) + goto out_noaxbuf; + if (testmgr_alloc_buf(xoutbuf)) + goto out_nooutbuf; + + sg = kmalloc(sizeof(*sg) * 8 * 3, GFP_KERNEL); + if (!sg) + goto out_nosg; + asg = &sg[8]; + sgout = &asg[8]; + + + printk(KERN_INFO "\ntesting speed of %s %s\n", algo, e); + + tfm = crypto_alloc_aead(algo, 0, 0); + + if (IS_ERR(tfm)) { + pr_err("alg: aead: Failed to load transform for %s: %ld\n", algo, + PTR_ERR(tfm)); + return; + } + + req = aead_request_alloc(tfm, GFP_KERNEL); + if (!req) { + pr_err("alg: aead: Failed to allocate request for %s\n", + algo); + goto out; + } + + i = 0; + do { + b_size = aead_sizes; + do { + assoc = axbuf[0]; + + if (aad_size < PAGE_SIZE) + memset(assoc, 0xff, aad_size); + else { + pr_err("associate data length (%u) too big\n", + aad_size); + goto out_nosg; + } + sg_init_one(&asg[0], assoc, aad_size); + + if ((*keysize + *b_size) > TVMEMSIZE * PAGE_SIZE) { + pr_err("template (%u) too big for tvmem (%lu)\n", + *keysize + *b_size, + TVMEMSIZE * PAGE_SIZE); + goto out; + } + + key = tvmem[0]; + for (j = 0; j < tcount; j++) { + if (template[j].klen == *keysize) { + key = template[j].key; + break; + } + } + ret = crypto_aead_setkey(tfm, key, *keysize); + ret = crypto_aead_setauthsize(tfm, authsize); + + iv_len = crypto_aead_ivsize(tfm); + if (iv_len) + memset(&iv, 0xff, iv_len); + + crypto_aead_clear_flags(tfm, ~0); + printk(KERN_INFO "test %u (%d bit key, %d byte blocks): ", + i, *keysize * 8, *b_size); + + + memset(tvmem[0], 0xff, PAGE_SIZE); + + if (ret) { + pr_err("setkey() failed flags=%x\n", + crypto_aead_get_flags(tfm)); + goto out; + } + + sg_init_aead(&sg[0], xbuf, + *b_size + (enc ? authsize : 0)); + + sg_init_aead(&sgout[0], xoutbuf, + *b_size + (enc ? authsize : 0)); + + aead_request_set_crypt(req, sg, sgout, *b_size, iv); + aead_request_set_assoc(req, asg, aad_size); + + if (sec) + ret = test_aead_jiffies(req, enc, *b_size, sec); + else + ret = test_aead_cycles(req, enc, *b_size); + + if (ret) { + pr_err("%s() failed return code=%d\n", e, ret); + break; + } + b_size++; + i++; + } while (*b_size); + keysize++; + } while (*keysize); + +out: + crypto_free_aead(tfm); + kfree(sg); +out_nosg: + testmgr_free_buf(xoutbuf); +out_nooutbuf: + testmgr_free_buf(axbuf); +out_noaxbuf: + testmgr_free_buf(xbuf); +out_noxbuf: + return; +} static void test_cipher_speed(const char *algo, int enc, unsigned int sec, struct cipher_speed_template *template, @@ -1427,6 +1692,11 @@ static int do_test(int m) speed_template_32_64); break; + case 211: + test_aead_speed("rfc4106(gcm(aes))", ENCRYPT, sec, + NULL, 0, 16, 8, aead_speed_template_20); + break; + case 300: /* fall through */ diff --git a/crypto/tcrypt.h b/crypto/tcrypt.h index ecdeeb1a7b05..6c7e21a09f78 100644 --- a/crypto/tcrypt.h +++ b/crypto/tcrypt.h @@ -22,6 +22,11 @@ struct cipher_speed_template { unsigned int klen; }; +struct aead_speed_template { + const char *key; + unsigned int klen; +}; + struct hash_speed { unsigned int blen; /* buffer length */ unsigned int plen; /* per-update length */ @@ -57,6 +62,11 @@ static u8 speed_template_32_48[] = {32, 48, 0}; static u8 speed_template_32_48_64[] = {32, 48, 64, 0}; static u8 speed_template_32_64[] = {32, 64, 0}; +/* + * AEAD speed tests + */ +static u8 aead_speed_template_20[] = {20, 0}; + /* * Digest speed tests */ -- cgit v1.2.1 From 1d9a394b97b833d3ab37f49caf12d0be3c88050b Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Tue, 10 Dec 2013 20:26:19 +0100 Subject: crypto: ahash - Fully restore ahash request before completing When finishing the ahash request, the ahash_op_unaligned_done() will call complete() on the request. Yet, this will not call the correct complete callback. The correct complete callback was previously stored in the requests' private data, as seen in ahash_op_unaligned(). This patch restores the correct complete callback and .data field of the request before calling complete() on it. Signed-off-by: Marek Vasut Cc: David S. Miller Cc: Fabio Estevam Cc: Shawn Guo Signed-off-by: Herbert Xu --- crypto/ahash.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'crypto') diff --git a/crypto/ahash.c b/crypto/ahash.c index 793a27f2493e..a92dc382f781 100644 --- a/crypto/ahash.c +++ b/crypto/ahash.c @@ -213,7 +213,10 @@ static void ahash_op_unaligned_done(struct crypto_async_request *req, int err) ahash_op_unaligned_finish(areq, err); - complete(data, err); + areq->base.complete = complete; + areq->base.data = data; + + complete(&areq->base, err); } static int ahash_op_unaligned(struct ahash_request *req, -- cgit v1.2.1