diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-26 12:13:09 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2023-02-26 12:13:09 +0200 |
commit | a7ebf9d52e67015c0ae175c0a9bbff9da9fe6f32 (patch) | |
tree | f1180fb1d5e09883931d77c0886ccb6cc3feb5aa /cipher | |
parent | 161614ecc1c20d94a8599888524b1d1fd0eb2754 (diff) | |
download | libgcrypt-a7ebf9d52e67015c0ae175c0a9bbff9da9fe6f32.tar.gz |
aes-ppc: add ECB bulk acceleration for benchmarking purposes
* cipher/rijndael-ppc-functions.h (ECB_CRYPT_FUNC): New.
* cipher/rijndael-ppc.c (_gcry_aes_ppc8_ecb_crypt): New.
* cipher/rijndael-ppc9le.c (_gcry_aes_ppc9le_ecb_crypt): New.
* cipher/rijndael.c (_gcry_aes_ppc8_ecb_crypt)
(_gcry_aes_ppc9le_ecb_crypt): New.
(do_setkey): Set up _gcry_aes_ppc8_ecb_crypt for POWER8 and
_gcry_aes_ppc9le_ecb_crypt for POWER9.
--
Benchmark on POWER9:
Before:
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 0.875 ns/B 1090 MiB/s 2.01 c/B
ECB dec | 1.06 ns/B 899.8 MiB/s 2.44 c/B
After:
AES | nanosecs/byte mebibytes/sec cycles/byte
ECB enc | 0.305 ns/B 3126 MiB/s 0.702 c/B
ECB dec | 0.305 ns/B 3126 MiB/s 0.702 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher')
-rw-r--r-- | cipher/rijndael-ppc-functions.h | 257 | ||||
-rw-r--r-- | cipher/rijndael-ppc.c | 1 | ||||
-rw-r--r-- | cipher/rijndael-ppc9le.c | 1 | ||||
-rw-r--r-- | cipher/rijndael.c | 10 |
4 files changed, 269 insertions, 0 deletions
diff --git a/cipher/rijndael-ppc-functions.h b/cipher/rijndael-ppc-functions.h index 063c5358..8a05d3c9 100644 --- a/cipher/rijndael-ppc-functions.h +++ b/cipher/rijndael-ppc-functions.h @@ -118,6 +118,263 @@ void CFB_ENC_FUNC (void *context, unsigned char *iv_arg, VEC_STORE_BE (iv_arg, 0, outiv, bige_const); } + +void ECB_CRYPT_FUNC (void *context, void *outbuf_arg, const void *inbuf_arg, + size_t nblocks, int encrypt) +{ + const block bige_const = asm_load_be_const(); + RIJNDAEL_context *ctx = context; + const u128_t *rk = encrypt ? (u128_t *)&ctx->keyschenc + : (u128_t *)&ctx->keyschdec; + const u128_t *in = (const u128_t *)inbuf_arg; + u128_t *out = (u128_t *)outbuf_arg; + int rounds = ctx->rounds; + ROUND_KEY_VARIABLES; + block b0, b1, b2, b3, b4, b5, b6, b7; + block rkey; + + if (!encrypt && !ctx->decryption_prepared) + { + internal_aes_ppc_prepare_decryption (ctx); + ctx->decryption_prepared = 1; + } + + PRELOAD_ROUND_KEYS (rounds); + + for (; nblocks >= 8; nblocks -= 8) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + b0 = asm_xor (rkey0, b0); + b1 = asm_xor (rkey0, b1); + b4 = VEC_LOAD_BE (in, 4, bige_const); + b5 = VEC_LOAD_BE (in, 5, bige_const); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + b6 = VEC_LOAD_BE (in, 6, bige_const); + b7 = VEC_LOAD_BE (in, 7, bige_const); + in += 8; + b4 = asm_xor (rkey0, b4); + b5 = asm_xor (rkey0, b5); + b6 = asm_xor (rkey0, b6); + b7 = asm_xor (rkey0, b7); + + if (encrypt) + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); \ + b4 = asm_cipher_be (b4, rkey); \ + b5 = asm_cipher_be (b5, rkey); \ + b6 = asm_cipher_be (b6, rkey); \ + b7 = asm_cipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, rkeylast); + b1 = asm_cipherlast_be (b1, rkeylast); + b2 = asm_cipherlast_be (b2, rkeylast); + b3 = asm_cipherlast_be (b3, rkeylast); + b4 = asm_cipherlast_be (b4, rkeylast); + b5 = asm_cipherlast_be (b5, rkeylast); + b6 = asm_cipherlast_be (b6, rkeylast); + b7 = asm_cipherlast_be (b7, rkeylast); + } + else + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); \ + b4 = asm_ncipher_be (b4, rkey); \ + b5 = asm_ncipher_be (b5, rkey); \ + b6 = asm_ncipher_be (b6, rkey); \ + b7 = asm_ncipher_be (b7, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } + +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, rkeylast); + b1 = asm_ncipherlast_be (b1, rkeylast); + b2 = asm_ncipherlast_be (b2, rkeylast); + b3 = asm_ncipherlast_be (b3, rkeylast); + b4 = asm_ncipherlast_be (b4, rkeylast); + b5 = asm_ncipherlast_be (b5, rkeylast); + b6 = asm_ncipherlast_be (b6, rkeylast); + b7 = asm_ncipherlast_be (b7, rkeylast); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + VEC_STORE_BE (out, 4, b4, bige_const); + VEC_STORE_BE (out, 5, b5, bige_const); + VEC_STORE_BE (out, 6, b6, bige_const); + VEC_STORE_BE (out, 7, b7, bige_const); + out += 8; + } + + if (nblocks >= 4) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + b1 = VEC_LOAD_BE (in, 1, bige_const); + b2 = VEC_LOAD_BE (in, 2, bige_const); + b3 = VEC_LOAD_BE (in, 3, bige_const); + + b0 = asm_xor (rkey0, b0); + b1 = asm_xor (rkey0, b1); + b2 = asm_xor (rkey0, b2); + b3 = asm_xor (rkey0, b3); + + if (encrypt) + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_cipher_be (b0, rkey); \ + b1 = asm_cipher_be (b1, rkey); \ + b2 = asm_cipher_be (b2, rkey); \ + b3 = asm_cipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } +#undef DO_ROUND + + b0 = asm_cipherlast_be (b0, rkeylast); + b1 = asm_cipherlast_be (b1, rkeylast); + b2 = asm_cipherlast_be (b2, rkeylast); + b3 = asm_cipherlast_be (b3, rkeylast); + } + else + { +#define DO_ROUND(r) \ + rkey = ALIGNED_LOAD (rk, r); \ + b0 = asm_ncipher_be (b0, rkey); \ + b1 = asm_ncipher_be (b1, rkey); \ + b2 = asm_ncipher_be (b2, rkey); \ + b3 = asm_ncipher_be (b3, rkey); + + DO_ROUND(1); + DO_ROUND(2); + DO_ROUND(3); + DO_ROUND(4); + DO_ROUND(5); + DO_ROUND(6); + DO_ROUND(7); + DO_ROUND(8); + DO_ROUND(9); + if (rounds >= 12) + { + DO_ROUND(10); + DO_ROUND(11); + if (rounds > 12) + { + DO_ROUND(12); + DO_ROUND(13); + } + } +#undef DO_ROUND + + b0 = asm_ncipherlast_be (b0, rkeylast); + b1 = asm_ncipherlast_be (b1, rkeylast); + b2 = asm_ncipherlast_be (b2, rkeylast); + b3 = asm_ncipherlast_be (b3, rkeylast); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + VEC_STORE_BE (out, 1, b1, bige_const); + VEC_STORE_BE (out, 2, b2, bige_const); + VEC_STORE_BE (out, 3, b3, bige_const); + + in += 4; + out += 4; + nblocks -= 4; + } + + for (; nblocks; nblocks--) + { + b0 = VEC_LOAD_BE (in, 0, bige_const); + + if (encrypt) + { + AES_ENCRYPT (b0, rounds); + } + else + { + AES_DECRYPT (b0, rounds); + } + + VEC_STORE_BE (out, 0, b0, bige_const); + + out++; + in++; + } +} + + void CFB_DEC_FUNC (void *context, unsigned char *iv_arg, void *outbuf_arg, const void *inbuf_arg, size_t nblocks) diff --git a/cipher/rijndael-ppc.c b/cipher/rijndael-ppc.c index 19f6a7e1..53c4f126 100644 --- a/cipher/rijndael-ppc.c +++ b/cipher/rijndael-ppc.c @@ -189,6 +189,7 @@ _gcry_aes_ppc8_prepare_decryption (RIJNDAEL_context *ctx) #define GCRY_AES_PPC8 1 #define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc8_encrypt #define DECRYPT_BLOCK_FUNC _gcry_aes_ppc8_decrypt +#define ECB_CRYPT_FUNC _gcry_aes_ppc8_ecb_crypt #define CFB_ENC_FUNC _gcry_aes_ppc8_cfb_enc #define CFB_DEC_FUNC _gcry_aes_ppc8_cfb_dec #define CBC_ENC_FUNC _gcry_aes_ppc8_cbc_enc diff --git a/cipher/rijndael-ppc9le.c b/cipher/rijndael-ppc9le.c index facdedd4..9ce9c224 100644 --- a/cipher/rijndael-ppc9le.c +++ b/cipher/rijndael-ppc9le.c @@ -88,6 +88,7 @@ asm_store_be_noswap(block vec, unsigned long offset, void *ptr) #define GCRY_AES_PPC9LE 1 #define ENCRYPT_BLOCK_FUNC _gcry_aes_ppc9le_encrypt #define DECRYPT_BLOCK_FUNC _gcry_aes_ppc9le_decrypt +#define ECB_CRYPT_FUNC _gcry_aes_ppc9le_ecb_crypt #define CFB_ENC_FUNC _gcry_aes_ppc9le_cfb_enc #define CFB_DEC_FUNC _gcry_aes_ppc9le_cfb_dec #define CBC_ENC_FUNC _gcry_aes_ppc9le_cbc_enc diff --git a/cipher/rijndael.c b/cipher/rijndael.c index 84cb7109..071d4a16 100644 --- a/cipher/rijndael.c +++ b/cipher/rijndael.c @@ -250,6 +250,10 @@ extern unsigned int _gcry_aes_ppc8_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); +extern void _gcry_aes_ppc8_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); + extern void _gcry_aes_ppc8_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); @@ -287,6 +291,10 @@ extern unsigned int _gcry_aes_ppc9le_decrypt(const RIJNDAEL_context *ctx, unsigned char *dst, const unsigned char *src); +extern void _gcry_aes_ppc9le_ecb_crypt (void *context, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); + extern void _gcry_aes_ppc9le_cfb_enc (void *context, unsigned char *iv, void *outbuf_arg, const void *inbuf_arg, size_t nblocks); @@ -616,6 +624,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC9LE bulk encryption routines. */ + bulk_ops->ecb_crypt = _gcry_aes_ppc9le_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc9le_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc9le_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc9le_cbc_enc; @@ -645,6 +654,7 @@ do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen, ctx->prepare_decryption = _gcry_aes_ppc8_prepare_decryption; /* Setup PPC8 bulk encryption routines. */ + bulk_ops->ecb_crypt = _gcry_aes_ppc8_ecb_crypt; bulk_ops->cfb_enc = _gcry_aes_ppc8_cfb_enc; bulk_ops->cfb_dec = _gcry_aes_ppc8_cfb_dec; bulk_ops->cbc_enc = _gcry_aes_ppc8_cbc_enc; |