diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-27 21:29:57 +0300 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-07-31 18:03:35 +0300 |
commit | 2ac6c24aa53024eb415d49f52229e868f72f47f8 (patch) | |
tree | 68bea988a5d46c963d23d756228f9e536d2d5e6a /cipher | |
parent | e51d3b8330a1d4b15e3484df90646e075c02f54b (diff) | |
download | libgcrypt-2ac6c24aa53024eb415d49f52229e868f72f47f8.tar.gz |
rijndael-ppc: small speed-up for CBC and CFB encryption
* cipher/rijndael-ppc-common.h (AES_ENCRYPT_ALL): Remove
* cipher/rijndael-ppc-functions.h (CFB_ENC_FUNC)
(CBC_ENC_FUNC): Removed two block unrolled loop; Optimized single
block loop for shorter critical-path.
--
Patch gives small ~3% performance increase for CBC and CFB
encryption, tested with POWER8.
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/rijndael-ppc-common.h | 25 | ||||
-rw-r--r-- | cipher/rijndael-ppc-functions.h | 133 |
2 files changed, 75 insertions, 83 deletions
diff --git a/cipher/rijndael-ppc-common.h b/cipher/rijndael-ppc-common.h index bbbeaac0..3fa9a0b9 100644 --- a/cipher/rijndael-ppc-common.h +++ b/cipher/rijndael-ppc-common.h @@ -158,31 +158,6 @@ typedef union rkeylast = ALIGNED_LOAD (rk, nrounds); \ } while (0) -#define AES_ENCRYPT_ALL(blk, nrounds) \ - do { \ - blk ^= rkey0; \ - blk = asm_cipher_be (blk, rkey1); \ - blk = asm_cipher_be (blk, rkey2); \ - blk = asm_cipher_be (blk, rkey3); \ - blk = asm_cipher_be (blk, rkey4); \ - blk = asm_cipher_be (blk, rkey5); \ - blk = asm_cipher_be (blk, rkey6); \ - blk = asm_cipher_be (blk, rkey7); \ - blk = asm_cipher_be (blk, rkey8); \ - blk = asm_cipher_be (blk, rkey9); \ - if (nrounds >= 12) \ - { \ - blk = asm_cipher_be (blk, rkey10); \ - blk = asm_cipher_be (blk, rkey11); \ - if (rounds > 12) \ - { \ - blk = asm_cipher_be (blk, rkey12); \ - blk = asm_cipher_be (blk, rkey13); \ - } \ - } \ - blk = asm_cipherlast_be (blk, rkeylast); \ - } while (0) - static ASM_FUNC_ATTR_INLINE block asm_aligned_ld(unsigned long offset, const void *ptr) diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h index 72f31852..23fa4206 100644 --- a/cipher/rijndael-ppc-functions.h +++ b/cipher/rijndael-ppc-functions.h @@ -1,6 +1,6 @@ /* Rijndael (AES) for GnuPG - PowerPC Vector Crypto AES implementation * Copyright (C) 2019 Shawn Landden <shawn@git.icu> - * Copyright (C) 2019-2020 Jussi Kivilinna <jussi.kivilinna@iki.fi> + * Copyright (C) 2019-2020, 2022 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -76,43 +76,46 @@ void CFB_ENC_FUNC (void *context, unsigned char *iv_arg, u128_t *out = (u128_t *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES_ALL; - block rkeylast_orig; - block iv; + block key0_xor_keylast; + block iv, outiv; iv = VEC_LOAD_BE (iv_arg, 0, bige_const); + outiv = iv; PRELOAD_ROUND_KEYS_ALL (rounds); - rkeylast_orig = rkeylast; - - for (; nblocks >= 2; nblocks -= 2) - { - block in2, iv1; - - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in, 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, 0, bige_const); - in += 2; - - AES_ENCRYPT_ALL (iv, rounds); - - iv1 = iv; - rkeylast = rkeylast_orig ^ in2; - - AES_ENCRYPT_ALL (iv, rounds); - - VEC_STORE_BE (out++, 0, iv1, bige_const); - VEC_STORE_BE (out++, 0, iv, bige_const); - } + key0_xor_keylast = rkey0 ^ rkeylast; + iv ^= rkey0; for (; nblocks; nblocks--) { - rkeylast = rkeylast_orig ^ VEC_LOAD_BE (in++, 0, bige_const); - - AES_ENCRYPT_ALL (iv, rounds); + rkeylast = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const); + + iv = asm_cipher_be (iv, rkey1); + iv = asm_cipher_be (iv, rkey2); + iv = asm_cipher_be (iv, rkey3); + iv = asm_cipher_be (iv, rkey4); + iv = asm_cipher_be (iv, rkey5); + iv = asm_cipher_be (iv, rkey6); + iv = asm_cipher_be (iv, rkey7); + iv = asm_cipher_be (iv, rkey8); + iv = asm_cipher_be (iv, rkey9); + if (rounds >= 12) + { + iv = asm_cipher_be (iv, rkey10); + iv = asm_cipher_be (iv, rkey11); + if (rounds > 12) + { + iv = asm_cipher_be (iv, rkey12); + iv = asm_cipher_be (iv, rkey13); + } + } + iv = asm_cipherlast_be (iv, rkeylast); - VEC_STORE_BE (out++, 0, iv, bige_const); + outiv = rkey0 ^ iv; + VEC_STORE_BE (out++, 0, outiv, bige_const); } - VEC_STORE_BE (iv_arg, 0, iv, bige_const); + VEC_STORE_BE (iv_arg, 0, outiv, bige_const); } void CFB_DEC_FUNC (void *context, unsigned char *iv_arg, @@ -324,47 +327,61 @@ void CBC_ENC_FUNC (void *context, unsigned char *iv_arg, byte *out = (byte *)outbuf_arg; int rounds = ctx->rounds; ROUND_KEY_VARIABLES_ALL; - block lastiv, b; + block iv, key0_xor_keylast, nextiv, outiv; unsigned int outadd = -(!cbc_mac) & 16; - lastiv = VEC_LOAD_BE (iv_arg, 0, bige_const); - - PRELOAD_ROUND_KEYS_ALL (rounds); - - for (; nblocks >= 2; nblocks -= 2) - { - block in2, lastiv1; - - b = lastiv ^ VEC_LOAD_BE (in, 0, bige_const); - in2 = VEC_LOAD_BE (in + 1, 0, bige_const); - in += 2; - - AES_ENCRYPT_ALL (b, rounds); + if (nblocks == 0) /* CMAC may call with nblocks 0. */ + return; - lastiv1 = b; - b = lastiv1 ^ in2; + iv = VEC_LOAD_BE (iv_arg, 0, bige_const); - AES_ENCRYPT_ALL (b, rounds); + PRELOAD_ROUND_KEYS_ALL (rounds); + key0_xor_keylast = rkey0 ^ rkeylast; - lastiv = b; - VEC_STORE_BE ((u128_t *)out, 0, lastiv1, bige_const); - out += outadd; - VEC_STORE_BE ((u128_t *)out, 0, lastiv, bige_const); - out += outadd; - } + nextiv = VEC_LOAD_BE (in++, 0, bige_const); + iv ^= rkey0 ^ nextiv; - for (; nblocks; nblocks--) + do { - b = lastiv ^ VEC_LOAD_BE (in++, 0, bige_const); - - AES_ENCRYPT_ALL (b, rounds); + if (--nblocks) + { + nextiv = key0_xor_keylast ^ VEC_LOAD_BE (in++, 0, bige_const); + } - lastiv = b; - VEC_STORE_BE ((u128_t *)out, 0, b, bige_const); + iv = asm_cipher_be (iv, rkey1); + iv = asm_cipher_be (iv, rkey2); + iv = asm_cipher_be (iv, rkey3); + iv = asm_cipher_be (iv, rkey4); + iv = asm_cipher_be (iv, rkey5); + iv = asm_cipher_be (iv, rkey6); + iv = asm_cipher_be (iv, rkey7); + iv = asm_cipher_be (iv, rkey8); + iv = asm_cipher_be (iv, rkey9); + if (rounds >= 12) + { + iv = asm_cipher_be (iv, rkey10); + iv = asm_cipher_be (iv, rkey11); + if (rounds > 12) + { + iv = asm_cipher_be (iv, rkey12); + iv = asm_cipher_be (iv, rkey13); + } + } + outiv = iv; + /* Proper order for following instructions is important for best + * performance on POWER8: the output path vcipherlast needs to be + * last one. */ + __asm__ volatile ("vcipherlast %0, %0, %2\n\t" + "vcipherlast %1, %1, %3\n\t" + : "+v" (iv), "+outiv" (outiv) + : "v" (nextiv), "v" (rkeylast)); + + VEC_STORE_BE ((u128_t *)out, 0, outiv, bige_const); out += outadd; } + while (nblocks); - VEC_STORE_BE (iv_arg, 0, lastiv, bige_const); + VEC_STORE_BE (iv_arg, 0, outiv, bige_const); } void CBC_DEC_FUNC (void *context, unsigned char *iv_arg, |