summaryrefslogtreecommitdiff
path: root/cipher
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2022-07-27 21:29:57 +0300
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2022-07-31 18:03:35 +0300
commit2ac6c24aa53024eb415d49f52229e868f72f47f8 (patch)
tree68bea988a5d46c963d23d756228f9e536d2d5e6a /cipher
parente51d3b8330a1d4b15e3484df90646e075c02f54b (diff)
downloadlibgcrypt-2ac6c24aa53024eb415d49f52229e868f72f47f8.tar.gz
rijndael-ppc: small speed-up for CBC and CFB encryption
* cipher/rijndael-ppc-common.h (AES_ENCRYPT_ALL): Remove * cipher/rijndael-ppc-functions.h (CFB_ENC_FUNC) (CBC_ENC_FUNC): Removed two block unrolled loop; Optimized single block loop for shorter critical-path. -- Patch gives small ~3% performance increase for CBC and CFB encryption, tested with POWER8. Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r--cipher/rijndael-ppc-common.h25
-rw-r--r--cipher/rijndael-ppc-functions.h133
2 files changed, 75 insertions, 83 deletions
diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h
index bbbeaac0..3fa9a0b9 100644
--- a/cipher/rijndael-ppc-common.h
+++ b/cipher/rijndael-ppc-common.h
@@ -158,31 +158,6 @@ typedef union
rkeylast = ALIGNED_LOAD (rk, nrounds); \
} while (0)
-#define AES_ENCRYPT_ALL(blk, nrounds) \
- do { \
- blk ^= rkey0; \
- blk = asm_cipher_be (blk, rkey1); \
- blk = asm_cipher_be (blk, rkey2); \
- blk = asm_cipher_be (blk, rkey3); \
- blk = asm_cipher_be (blk, rkey4); \
- blk = asm_cipher_be (blk, rkey5); \
- blk = asm_cipher_be (blk, rkey6); \
- blk = asm_cipher_be (blk, rkey7); \
- blk = asm_cipher_be (blk, rkey8); \
- blk = asm_cipher_be (blk, rkey9); \
- if (nrounds >= 12) \
- { \
- blk = asm_cipher_be (blk, rkey10); \
- blk = asm_cipher_be (blk, rkey11); \
- if (rounds > 12) \
- { \
- blk = asm_cipher_be (blk, rkey12); \
- blk = asm_cipher_be (blk, rkey13); \
- } \
- } \
- blk = asm_cipherlast_be (blk, rkeylast); \
- } while (0)
-
static ASM_FUNC_ATTR_INLINE block
asm_aligned_ld(unsigned long offset, const void *ptr)
diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h
index 72f31852..23fa4206 100644
--- a/cipher/rijndael-ppc-functions.h
+++ b/cipher/rijndael-ppc-functions.h
@@ -1,6 +1,6 @@
/* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation
* Copyright (C) 2019 Shawn Landden <shawn@git.icu>
- * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ * Copyright (C) 2019-2020, 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -76,43 +76,46 @@ void CFB_ENC_FUNC (void *context, unsigned char *iv_arg,
u128_t *out = (u128_t *)outbuf_arg;
int rounds = ctx->rounds;
ROUND_KEY_VARIABLES_ALL;
- block rkeylast_orig;
- block iv;
+ block key0_xor_keylast;
+ block iv, outiv;
iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
+ outiv = iv;
PRELOAD_ROUND_KEYS_ALL (rounds);
- rkeylast_orig = rkeylast;
-
- for (; nblocks >= 2; nblocks -= 2)
- {
- block in2, iv1;
-
- rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const);
- in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
- in += 2;
-
- AES_ENCRYPT_ALL (iv, rounds);
-
- iv1 = iv;
- rkeylast = rkeylast_orig ^ in2;
-
- AES_ENCRYPT_ALL (iv, rounds);
-
- VEC_STORE_BE (out++, 0, iv1, bige_const);
- VEC_STORE_BE (out++, 0, iv, bige_const);
- }
+ key0_xor_keylast = rkey0 ^ rkeylast;
+ iv ^= rkey0;
for (; nblocks; nblocks--)
{
- rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const);
-
- AES_ENCRYPT_ALL (iv, rounds);
+ rkeylast = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const);
+
+ iv = asm_cipher_be (iv, rkey1);
+ iv = asm_cipher_be (iv, rkey2);
+ iv = asm_cipher_be (iv, rkey3);
+ iv = asm_cipher_be (iv, rkey4);
+ iv = asm_cipher_be (iv, rkey5);
+ iv = asm_cipher_be (iv, rkey6);
+ iv = asm_cipher_be (iv, rkey7);
+ iv = asm_cipher_be (iv, rkey8);
+ iv = asm_cipher_be (iv, rkey9);
+ if (rounds >= 12)
+ {
+ iv = asm_cipher_be (iv, rkey10);
+ iv = asm_cipher_be (iv, rkey11);
+ if (rounds > 12)
+ {
+ iv = asm_cipher_be (iv, rkey12);
+ iv = asm_cipher_be (iv, rkey13);
+ }
+ }
+ iv = asm_cipherlast_be (iv, rkeylast);
- VEC_STORE_BE (out++, 0, iv, bige_const);
+ outiv = rkey0 ^ iv;
+ VEC_STORE_BE (out++, 0, outiv, bige_const);
}
- VEC_STORE_BE (iv_arg, 0, iv, bige_const);
+ VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
}
void CFB_DEC_FUNC (void *context, unsigned char *iv_arg,
@@ -324,47 +327,61 @@ void CBC_ENC_FUNC (void *context, unsigned char *iv_arg,
byte *out = (byte *)outbuf_arg;
int rounds = ctx->rounds;
ROUND_KEY_VARIABLES_ALL;
- block lastiv, b;
+ block iv, key0_xor_keylast, nextiv, outiv;
unsigned int outadd = -(!cbc_mac) & 16;
- lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const);
-
- PRELOAD_ROUND_KEYS_ALL (rounds);
-
- for (; nblocks >= 2; nblocks -= 2)
- {
- block in2, lastiv1;
-
- b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const);
- in2 = VEC_LOAD_BE (in + 1, 0, bige_const);
- in += 2;
-
- AES_ENCRYPT_ALL (b, rounds);
+ if (nblocks == 0) /* CMAC may call with nblocks 0. */
+ return;
- lastiv1 = b;
- b = lastiv1 ^ in2;
+ iv = VEC_LOAD_BE (iv_arg, 0, bige_const);
- AES_ENCRYPT_ALL (b, rounds);
+ PRELOAD_ROUND_KEYS_ALL (rounds);
+ key0_xor_keylast = rkey0 ^ rkeylast;
- lastiv = b;
- VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const);
- out += outadd;
- VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const);
- out += outadd;
- }
+ nextiv = VEC_LOAD_BE (in++, 0, bige_const);
+ iv ^= rkey0 ^ nextiv;
- for (; nblocks; nblocks--)
+ do
{
- b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const);
-
- AES_ENCRYPT_ALL (b, rounds);
+ if (--nblocks)
+ {
+ nextiv = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const);
+ }
- lastiv = b;
- VEC_STORE_BE ((u128_t *)out, 0, b, bige_const);
+ iv = asm_cipher_be (iv, rkey1);
+ iv = asm_cipher_be (iv, rkey2);
+ iv = asm_cipher_be (iv, rkey3);
+ iv = asm_cipher_be (iv, rkey4);
+ iv = asm_cipher_be (iv, rkey5);
+ iv = asm_cipher_be (iv, rkey6);
+ iv = asm_cipher_be (iv, rkey7);
+ iv = asm_cipher_be (iv, rkey8);
+ iv = asm_cipher_be (iv, rkey9);
+ if (rounds >= 12)
+ {
+ iv = asm_cipher_be (iv, rkey10);
+ iv = asm_cipher_be (iv, rkey11);
+ if (rounds > 12)
+ {
+ iv = asm_cipher_be (iv, rkey12);
+ iv = asm_cipher_be (iv, rkey13);
+ }
+ }
+ outiv = iv;
+ /* Proper order for following instructions is important for best
+ * performance on POWER8: the output path vcipherlast needs to be
+ * last one. */
+ __asm__ volatile ("vcipherlast %0, %0, %2\n\t"
+ "vcipherlast %1, %1, %3\n\t"
+ : "+v" (iv), "+outiv" (outiv)
+ : "v" (nextiv), "v" (rkeylast));
+
+ VEC_STORE_BE ((u128_t *)out, 0, outiv, bige_const);
out += outadd;
}
+ while (nblocks);
- VEC_STORE_BE (iv_arg, 0, lastiv, bige_const);
+ VEC_STORE_BE (iv_arg, 0, outiv, bige_const);
}
void CBC_DEC_FUNC (void *context, unsigned char *iv_arg,