summaryrefslogtreecommitdiff
path: root/cipher/camellia-glue.c
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2023-02-26 21:15:36 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2023-02-28 16:14:35 +0200
commit898c857206ada06d70c5f46ac5adaa9d7058e672 (patch)
tree7248f49dee5fabc13354230fba2ac4ffdd1ad5a8 /cipher/camellia-glue.c
parent6fa11d8b7070eb7c4c296c879213c9596bd00b1c (diff)
downloadlibgcrypt-898c857206ada06d70c5f46ac5adaa9d7058e672.tar.gz
camellia: add AArch64 crypto-extension implementation
* cipher/Makefile.am: Add 'camellia-aarch64-ce.(c|o|lo)'. (aarch64_neon_cflags): New. * cipher/camellia-aarch64-ce.c: New. * cipher/camellia-glue.c (USE_AARCH64_CE): New. (CAMELLIA_context): Add 'use_aarch64ce'. (_gcry_camellia_aarch64ce_encrypt_blk16) (_gcry_camellia_aarch64ce_decrypt_blk16) (_gcry_camellia_aarch64ce_keygen, camellia_aarch64ce_enc_blk16) (camellia_aarch64ce_dec_blk16, aarch64ce_burn_stack_depth): New. (camellia_setkey) [USE_AARCH64_CE]: Set use_aarch64ce if HW has HWF_ARM_AES; Use AArch64/CE key generation if supported by HW. (camellia_encrypt_blk1_32, camellia_decrypt_blk1_32) [USE_AARCH64_CE]: Add AArch64/CE code path. -- Patch enables 128-bit vector instrinsics implementation of Camellia cipher for AArch64. Benchmark on AWS Graviton2: Before: CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz ECB enc | 5.99 ns/B 159.2 MiB/s 14.97 c/B 2500 ECB dec | 5.99 ns/B 159.1 MiB/s 14.98 c/B 2500 CBC enc | 6.16 ns/B 154.7 MiB/s 15.41 c/B 2500 CBC dec | 6.12 ns/B 155.8 MiB/s 15.29 c/B 2499 CFB enc | 6.49 ns/B 147.0 MiB/s 16.21 c/B 2500 CFB dec | 6.05 ns/B 157.6 MiB/s 15.13 c/B 2500 CTR enc | 6.09 ns/B 156.7 MiB/s 15.22 c/B 2500 CTR dec | 6.09 ns/B 156.6 MiB/s 15.22 c/B 2500 XTS enc | 6.16 ns/B 154.9 MiB/s 15.39 c/B 2500 XTS dec | 6.16 ns/B 154.8 MiB/s 15.40 c/B 2499 GCM enc | 6.31 ns/B 151.1 MiB/s 15.78 c/B 2500 GCM dec | 6.31 ns/B 151.1 MiB/s 15.78 c/B 2500 GCM auth | 0.206 ns/B 4635 MiB/s 0.514 c/B 2500 OCB enc | 6.63 ns/B 143.9 MiB/s 16.57 c/B 2499 OCB dec | 6.63 ns/B 143.9 MiB/s 16.56 c/B 2499 OCB auth | 6.55 ns/B 145.7 MiB/s 16.37 c/B 2499 After (ecb ~2.1x faster): CAMELLIA128 | nanosecs/byte mebibytes/sec cycles/byte auto Mhz ECB enc | 2.77 ns/B 344.2 MiB/s 6.93 c/B 2499 ECB dec | 2.76 ns/B 345.3 MiB/s 6.90 c/B 2499 CBC enc | 6.17 ns/B 154.7 MiB/s 15.41 c/B 2499 CBC dec | 2.89 ns/B 330.3 MiB/s 7.22 c/B 2500 CFB enc | 6.48 ns/B 147.1 MiB/s 16.21 c/B 2499 CFB dec | 2.84 ns/B 336.1 MiB/s 7.09 c/B 2499 CTR enc | 2.90 ns/B 328.8 MiB/s 7.25 c/B 2499 CTR dec | 2.90 ns/B 328.9 MiB/s 7.25 c/B 2500 XTS enc | 2.93 ns/B 325.3 MiB/s 7.33 c/B 2500 XTS dec | 2.92 ns/B 326.2 MiB/s 7.31 c/B 2500 GCM enc | 3.10 ns/B 307.2 MiB/s 7.76 c/B 2500 GCM dec | 3.10 ns/B 307.2 MiB/s 7.76 c/B 2499 GCM auth | 0.206 ns/B 4635 MiB/s 0.514 c/B 2500 Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'cipher/camellia-glue.c')
-rw-r--r--cipher/camellia-glue.c70
1 files changed, 70 insertions, 0 deletions
diff --git a/cipher/camellia-glue.c b/cipher/camellia-glue.c
index 46bbe182..0b07f2d1 100644
--- a/cipher/camellia-glue.c
+++ b/cipher/camellia-glue.c
@@ -119,6 +119,16 @@
# define USE_PPC_CRYPTO 1
#endif
+/* USE_AARCH64_CE indicates whether to enable ARMv8/CE accelerated code. */
+#undef USE_AARCH64_CE
+#if defined(__AARCH64EL__) && \
+ defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \
+ defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) && \
+ defined(HAVE_COMPATIBLE_CC_AARCH64_NEON_INTRINSICS) && \
+ (__GNUC__ >= 4)
+# define USE_AARCH64_CE 1
+#endif
+
typedef struct
{
KEY_TABLE_TYPE keytable;
@@ -138,6 +148,9 @@ typedef struct
unsigned int use_ppc8:1;
unsigned int use_ppc9:1;
#endif /*USE_PPC_CRYPTO*/
+#ifdef USE_AARCH64_CE
+ unsigned int use_aarch64ce:1;
+#endif /*USE_AARCH64_CE*/
} CAMELLIA_context;
/* Assembly implementations use SystemV ABI, ABI conversion and additional
@@ -472,6 +485,36 @@ static const int ppc_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 +
2 * sizeof(void *);
#endif /*USE_PPC_CRYPTO*/
+#ifdef USE_AARCH64_CE
+extern void _gcry_camellia_aarch64ce_encrypt_blk16(const void *key_table,
+ void *out, const void *in,
+ int key_length);
+
+extern void _gcry_camellia_aarch64ce_decrypt_blk16(const void *key_table,
+ void *out, const void *in,
+ int key_length);
+
+extern void _gcry_camellia_aarch64ce_keygen(void *key_table, const void *vkey,
+ unsigned int keylen);
+
+void camellia_aarch64ce_enc_blk16(const CAMELLIA_context *ctx,
+ unsigned char *out, const unsigned char *in)
+{
+ _gcry_camellia_aarch64ce_encrypt_blk16 (ctx->keytable, out, in,
+ ctx->keybitlength / 8);
+}
+
+void camellia_aarch64ce_dec_blk16(const CAMELLIA_context *ctx,
+ unsigned char *out, const unsigned char *in)
+{
+ _gcry_camellia_aarch64ce_decrypt_blk16 (ctx->keytable, out, in,
+ ctx->keybitlength / 8);
+}
+
+static const int aarch64ce_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + 16 +
+ 2 * sizeof(void *);
+#endif /*USE_AARCH64_CE*/
+
static const char *selftest(void);
static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr,
@@ -549,6 +592,9 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
ctx->use_ppc9 = (hwf & HWF_PPC_VCRYPTO) && (hwf & HWF_PPC_ARCH_3_00);
ctx->use_ppc = ctx->use_ppc8 || ctx->use_ppc9;
#endif
+#ifdef USE_AARCH64_CE
+ ctx->use_aarch64ce = (hwf & HWF_ARM_AES) != 0;
+#endif
ctx->keybitlength=keylen*8;
@@ -575,6 +621,10 @@ camellia_setkey(void *c, const byte *key, unsigned keylen,
else if (ctx->use_ppc8)
_gcry_camellia_ppc8_keygen(ctx->keytable, key, keylen);
#endif
+#ifdef USE_AARCH64_CE
+ else if (ctx->use_aarch64ce)
+ _gcry_camellia_aarch64ce_keygen(ctx->keytable, key, keylen);
+#endif
else
{
Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable);
@@ -754,6 +804,16 @@ camellia_encrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
num_blks -= 16;
}
#endif
+#ifdef USE_AARCH64_CE
+ while (ctx->use_aarch64ce && num_blks >= 16)
+ {
+ camellia_aarch64ce_enc_blk16 (ctx, outbuf, inbuf);
+ stack_burn_size = aarch64ce_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{
@@ -855,6 +915,16 @@ camellia_decrypt_blk1_32 (void *priv, byte *outbuf, const byte *inbuf,
num_blks -= 16;
}
#endif
+#ifdef USE_AARCH64_CE
+ while (ctx->use_aarch64ce && num_blks >= 16)
+ {
+ camellia_aarch64ce_dec_blk16 (ctx, outbuf, inbuf);
+ stack_burn_size = aarch64ce_burn_stack_depth;
+ outbuf += CAMELLIA_BLOCK_SIZE * 16;
+ inbuf += CAMELLIA_BLOCK_SIZE * 16;
+ num_blks -= 16;
+ }
+#endif
while (num_blks)
{