summaryrefslogtreecommitdiff
path: root/cipher/aria.c
diff options
context:
space:
mode:
Diffstat (limited to 'cipher/aria.c')
-rw-r--r--cipher/aria.c299
1 files changed, 274 insertions, 25 deletions
diff --git a/cipher/aria.c b/cipher/aria.c
index 700ea409..18952d04 100644
--- a/cipher/aria.c
+++ b/cipher/aria.c
@@ -50,6 +50,60 @@
#endif
+/* USE_AESNI_AVX inidicates whether to compile with Intel AES-NI/AVX code. */
+#undef USE_AESNI_AVX
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX 1
+# endif
+#endif
+
+/* USE_GFNI_AVX inidicates whether to compile with Intel GFNI/AVX code. */
+#undef USE_GFNI_AVX
+#if defined(USE_AESNI_AVX) && defined(ENABLE_GFNI_SUPPORT)
+# define USE_GFNI_AVX 1
+#endif
+
+/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */
+#undef USE_AESNI_AVX2
+#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)
+# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \
+ defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS))
+# define USE_AESNI_AVX2 1
+# endif
+#endif
+
+/* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */
+#undef USE_GFNI_AVX2
+#if defined(USE_AESNI_AVX2) && defined(ENABLE_GFNI_SUPPORT)
+# define USE_GFNI_AVX2 1
+#endif
+
+/* How many parallel blocks to handle in bulk processing functions. */
+#if defined(USE_AESNI_AVX2)
+# define MAX_PARALLEL_BLKS 32
+#elif defined(USE_AESNI_AVX)
+# define MAX_PARALLEL_BLKS 16
+#else
+# define MAX_PARALLEL_BLKS 8
+#endif
+
+/* Assembly implementations use SystemV ABI, ABI conversion and additional
+ * stack to store XMM6-XMM15 needed on Win64. */
+#undef ASM_FUNC_ABI
+#undef ASM_EXTRA_STACK
+#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2)
+# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS
+# define ASM_FUNC_ABI __attribute__((sysv_abi))
+# define ASM_EXTRA_STACK (10 * 16)
+# else
+# define ASM_FUNC_ABI
+# define ASM_EXTRA_STACK 0
+# endif
+#endif
+
+
static const char *aria_selftest (void);
@@ -69,6 +123,15 @@ typedef struct
unsigned int decryption_prepared:1; /* The decryption key is set up. */
unsigned int bulk_prefetch_ready:1; /* Look-up table prefetch ready for
* current bulk operation. */
+
+#ifdef USE_AESNI_AVX
+ unsigned int use_aesni_avx:1;
+ unsigned int use_gfni_avx:1;
+#endif
+#ifdef USE_AESNI_AVX2
+ unsigned int use_aesni_avx2:1;
+ unsigned int use_gfni_avx2:1;
+#endif
} ARIA_context;
@@ -363,6 +426,102 @@ static struct
0
};
+#ifdef USE_AESNI_AVX
+extern unsigned int
+_gcry_aria_aesni_avx_ecb_crypt_blk1_16(const void *ctx, byte *out,
+ const byte *in, const void *key,
+ u64 nblks) ASM_FUNC_ABI;
+extern unsigned int
+_gcry_aria_aesni_avx_ctr_crypt_blk16(const void *ctx, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+
+#ifdef USE_GFNI_AVX
+extern unsigned int
+_gcry_aria_gfni_avx_ecb_crypt_blk1_16(const void *ctx, byte *out,
+ const byte *in, const void *key,
+ u64 nblks) ASM_FUNC_ABI;
+extern unsigned int
+_gcry_aria_gfni_avx_ctr_crypt_blk16(const void *ctx, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+#endif /* USE_GFNI_AVX */
+
+static inline unsigned int
+aria_avx_ecb_crypt_blk1_16(const ARIA_context *ctx, byte *out, const byte *in,
+ const u32 key[][ARIA_RD_KEY_WORDS], size_t nblks)
+{
+#ifdef USE_GFNI_AVX
+ if (ctx->use_gfni_avx)
+ return _gcry_aria_gfni_avx_ecb_crypt_blk1_16(ctx, out, in, key, nblks)
+ + ASM_EXTRA_STACK;
+ else
+#endif /* USE_GFNI_AVX */
+ return _gcry_aria_aesni_avx_ecb_crypt_blk1_16(ctx, out, in, key, nblks)
+ + ASM_EXTRA_STACK;
+}
+
+static inline unsigned int
+aria_avx_ctr_crypt_blk16(const ARIA_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef USE_GFNI_AVX
+ if (ctx->use_gfni_avx)
+ return _gcry_aria_gfni_avx_ctr_crypt_blk16(ctx, out, in, iv)
+ + ASM_EXTRA_STACK;
+ else
+#endif /* USE_GFNI_AVX */
+ return _gcry_aria_aesni_avx_ctr_crypt_blk16(ctx, out, in, iv)
+ + ASM_EXTRA_STACK;
+}
+#endif /* USE_AESNI_AVX */
+
+#ifdef USE_AESNI_AVX2
+extern unsigned int
+_gcry_aria_aesni_avx2_ecb_crypt_blk32(const void *ctx, byte *out,
+ const byte *in,
+ const void *key) ASM_FUNC_ABI;
+extern unsigned int
+_gcry_aria_aesni_avx2_ctr_crypt_blk32(const void *ctx, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+
+#ifdef USE_GFNI_AVX2
+extern unsigned int
+_gcry_aria_gfni_avx2_ecb_crypt_blk32(const void *ctx, byte *out,
+ const byte *in,
+ const void *key) ASM_FUNC_ABI;
+extern unsigned int
+_gcry_aria_gfni_avx2_ctr_crypt_blk32(const void *ctx, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+#endif /* USE_GFNI_AVX2 */
+
+static inline unsigned int
+aria_avx2_ecb_crypt_blk32(const ARIA_context *ctx, byte *out, const byte *in,
+ const u32 key[][ARIA_RD_KEY_WORDS])
+{
+#ifdef USE_GFNI_AVX2
+ if (ctx->use_gfni_avx2)
+ return _gcry_aria_gfni_avx2_ecb_crypt_blk32(ctx, out, in, key)
+ + ASM_EXTRA_STACK;
+ else
+#endif /* USE_GFNI_AVX2 */
+ return _gcry_aria_aesni_avx2_ecb_crypt_blk32(ctx, out, in, key)
+ + ASM_EXTRA_STACK;
+}
+
+static inline unsigned int
+aria_avx2_ctr_crypt_blk32(const ARIA_context *ctx, byte *out, const byte *in,
+ byte *iv)
+{
+#ifdef USE_GFNI_AVX2
+ if (ctx->use_gfni_avx2)
+ return _gcry_aria_gfni_avx2_ctr_crypt_blk32(ctx, out, in, iv)
+ + ASM_EXTRA_STACK;
+ else
+#endif /* USE_GFNI_AVX2 */
+ return _gcry_aria_aesni_avx2_ctr_crypt_blk32(ctx, out, in, iv)
+ + ASM_EXTRA_STACK;
+}
+#endif /* USE_AESNI_AVX2 */
+
/* Prefetching for sbox tables. */
static inline void
prefetch_table(const volatile byte *tab, size_t len)
@@ -864,7 +1023,47 @@ aria_crypt_blocks (ARIA_context *ctx, byte *out, const byte *in,
size_t num_blks, u32 key[][ARIA_RD_KEY_WORDS])
{
unsigned int burn_depth = 0;
- unsigned int nburn;
+
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2 || ctx->use_gfni_avx2)
+ {
+ unsigned int nburn = 0;
+
+ while (num_blks >= 32)
+ {
+ nburn = aria_avx2_ecb_crypt_blk32 (ctx, out, in, key);
+ in += 32 * ARIA_BLOCK_SIZE;
+ out += 32 * ARIA_BLOCK_SIZE;
+ num_blks -= 32;
+ }
+
+ burn_depth = nburn > burn_depth ? nburn : burn_depth;
+
+ if (num_blks == 0)
+ return burn_depth;
+ }
+#endif /* USE_AESNI_AVX2 */
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx || ctx->use_gfni_avx)
+ {
+ unsigned int nburn = 0;
+
+ while (num_blks >= 3)
+ {
+ size_t curr_blks = num_blks < 16 ? num_blks : 16;
+ nburn = aria_avx_ecb_crypt_blk1_16 (ctx, out, in, key, curr_blks);
+ in += curr_blks * ARIA_BLOCK_SIZE;
+ out += curr_blks * ARIA_BLOCK_SIZE;
+ num_blks -= curr_blks;
+ }
+
+ burn_depth = nburn > burn_depth ? nburn : burn_depth;
+
+ if (num_blks == 0)
+ return burn_depth;
+ }
+#endif /* USE_AESNI_AVX */
if (!ctx->bulk_prefetch_ready)
{
@@ -874,19 +1073,19 @@ aria_crypt_blocks (ARIA_context *ctx, byte *out, const byte *in,
while (num_blks >= 2)
{
- nburn = aria_crypt_2blks (ctx, out, in, key);
+ unsigned int nburn = aria_crypt_2blks (ctx, out, in, key);
burn_depth = nburn > burn_depth ? nburn : burn_depth;
- out += 2 * 16;
- in += 2 * 16;
+ out += 2 * ARIA_BLOCK_SIZE;
+ in += 2 * ARIA_BLOCK_SIZE;
num_blks -= 2;
}
while (num_blks)
{
- nburn = aria_crypt (ctx, out, in, key);
+ unsigned int nburn = aria_crypt (ctx, out, in, key);
burn_depth = nburn > burn_depth ? nburn : burn_depth;
- out += 16;
- in += 16;
+ out += ARIA_BLOCK_SIZE;
+ in += ARIA_BLOCK_SIZE;
num_blks--;
}
@@ -925,12 +1124,46 @@ _gcry_aria_ctr_enc(void *context, unsigned char *ctr,
const byte *inbuf = inbuf_arg;
int burn_stack_depth = 0;
+#ifdef USE_AESNI_AVX2
+ if (ctx->use_aesni_avx2 || ctx->use_gfni_avx2)
+ {
+ size_t nburn = 0;
+
+ while (nblocks >= 32)
+ {
+ nburn = aria_avx2_ctr_crypt_blk32 (ctx, outbuf, inbuf, ctr);
+ inbuf += 32 * ARIA_BLOCK_SIZE;
+ outbuf += 32 * ARIA_BLOCK_SIZE;
+ nblocks -= 32;
+ }
+
+ burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
+ }
+#endif /* USE_AESNI_AVX */
+
+#ifdef USE_AESNI_AVX
+ if (ctx->use_aesni_avx || ctx->use_gfni_avx)
+ {
+ size_t nburn = 0;
+
+ while (nblocks >= 16)
+ {
+ nburn = aria_avx_ctr_crypt_blk16 (ctx, outbuf, inbuf, ctr);
+ inbuf += 16 * ARIA_BLOCK_SIZE;
+ outbuf += 16 * ARIA_BLOCK_SIZE;
+ nblocks -= 16;
+ }
+
+ burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
+ }
+#endif /* USE_AESNI_AVX */
+
/* Process remaining blocks. */
if (nblocks)
{
- byte tmpbuf[16 * ARIA_BLOCK_SIZE];
+ byte tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
- size_t nburn;
+ size_t nburn = 0;
ctx->bulk_prefetch_ready = 0;
@@ -1002,7 +1235,7 @@ _gcry_aria_cbc_dec(void *context, unsigned char *iv,
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
size_t nburn;
@@ -1062,7 +1295,7 @@ _gcry_aria_cfb_dec(void *context, unsigned char *iv,
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
size_t nburn;
@@ -1099,14 +1332,14 @@ _gcry_aria_ecb_crypt (void *context, void *outbuf_arg,
/* Process remaining blocks. */
if (nblocks)
{
- bulk_crypt_fn_t crypt_blk1_16;
+ bulk_crypt_fn_t crypt_blk1_n;
size_t nburn;
ctx->bulk_prefetch_ready = 0;
- crypt_blk1_16 = encrypt ? aria_enc_blocks : aria_dec_blocks;
+ crypt_blk1_n = encrypt ? aria_enc_blocks : aria_dec_blocks;
- nburn = bulk_ecb_crypt_128(ctx, crypt_blk1_16,
- outbuf, inbuf, nblocks, 16);
+ nburn = bulk_ecb_crypt_128(ctx, crypt_blk1_n,
+ outbuf, inbuf, nblocks, MAX_PARALLEL_BLKS);
burn_stack_depth = nburn > burn_stack_depth ? nburn : burn_stack_depth;
}
@@ -1133,15 +1366,15 @@ _gcry_aria_xts_crypt (void *context, unsigned char *tweak, void *outbuf_arg,
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
- bulk_crypt_fn_t crypt_blk1_16;
+ bulk_crypt_fn_t crypt_blk1_n;
size_t nburn;
ctx->bulk_prefetch_ready = 0;
- crypt_blk1_16 = encrypt ? aria_enc_blocks : aria_dec_blocks;
+ crypt_blk1_n = encrypt ? aria_enc_blocks : aria_dec_blocks;
- nburn = bulk_xts_crypt_128(ctx, crypt_blk1_16,
+ nburn = bulk_xts_crypt_128(ctx, crypt_blk1_n,
outbuf, inbuf, nblocks,
tweak, tmpbuf,
sizeof(tmpbuf) / ARIA_BLOCK_SIZE,
@@ -1169,7 +1402,7 @@ _gcry_aria_ctr32le_enc(void *context, unsigned char *ctr,
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
size_t nburn;
@@ -1208,15 +1441,15 @@ _gcry_aria_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
- bulk_crypt_fn_t crypt_blk1_16;
+ bulk_crypt_fn_t crypt_blk1_n;
size_t nburn;
ctx->bulk_prefetch_ready = 0;
- crypt_blk1_16 = encrypt ? aria_enc_blocks : aria_dec_blocks;
+ crypt_blk1_n = encrypt ? aria_enc_blocks : aria_dec_blocks;
- nburn = bulk_ocb_crypt_128 (c, ctx, crypt_blk1_16, outbuf, inbuf, nblocks,
+ nburn = bulk_ocb_crypt_128 (c, ctx, crypt_blk1_n, outbuf, inbuf, nblocks,
&blkn, encrypt, tmpbuf,
sizeof(tmpbuf) / ARIA_BLOCK_SIZE,
&tmp_used);
@@ -1245,7 +1478,7 @@ _gcry_aria_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks)
/* Process remaining blocks. */
if (nblocks)
{
- unsigned char tmpbuf[16 * ARIA_BLOCK_SIZE];
+ unsigned char tmpbuf[MAX_PARALLEL_BLKS * ARIA_BLOCK_SIZE];
unsigned int tmp_used = ARIA_BLOCK_SIZE;
size_t nburn;
@@ -1275,6 +1508,9 @@ aria_setkey(void *c, const byte *key, unsigned keylen,
ARIA_context *ctx = c;
static int initialized = 0;
static const char *selftest_failed = NULL;
+ unsigned int hwf = _gcry_get_hw_features ();
+
+ (void)hwf;
if (keylen != 16 && keylen != 24 && keylen != 32)
return GPG_ERR_INV_KEYLEN;
@@ -1290,6 +1526,19 @@ aria_setkey(void *c, const byte *key, unsigned keylen,
if (selftest_failed)
return GPG_ERR_SELFTEST_FAILED;
+#ifdef USE_AESNI_AVX2
+ ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2);
+#endif
+#ifdef USE_GFNI_AVX2
+ ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2);
+#endif
+#ifdef USE_AESNI_AVX
+ ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
+#endif
+#ifdef USE_GFNI_AVX
+ ctx->use_gfni_avx = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX);
+#endif
+
/* Setup bulk encryption routines. */
memset (bulk_ops, 0, sizeof(*bulk_ops));
bulk_ops->cbc_enc = _gcry_aria_cbc_enc;