summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--cipher/aria-aesni-avx2-amd64.S368
-rw-r--r--cipher/aria.c50
2 files changed, 409 insertions, 9 deletions
diff --git a/cipher/aria-aesni-avx2-amd64.S b/cipher/aria-aesni-avx2-amd64.S
index 0a89b0bf..d33fa54b 100644
--- a/cipher/aria-aesni-avx2-amd64.S
+++ b/cipher/aria-aesni-avx2-amd64.S
@@ -31,6 +31,9 @@
#ifdef ENABLE_GFNI_SUPPORT
# define CONFIG_AS_GFNI 1
#endif
+#ifdef HAVE_GCC_INLINE_ASM_VAES_VPCLMUL
+# define CONFIG_AS_VAES 1
+#endif
/* struct ARIA_context: */
#define ARIA_BLOCK_SIZE 16
@@ -358,6 +361,53 @@
vgf2p8affineinvqb $0, t2, x7, x7
#endif /* CONFIG_AS_GFNI */
+#ifdef CONFIG_AS_VAES
+#define aria_sbox_8way_vaes(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ t0, t1, t2, t3, \
+ t4, t5, t6, t7) \
+ vpxor t7, t7, t7; \
+ vpxor t6, t6, t6; \
+ vbroadcasti128 .Linv_shift_row rRIP, t0; \
+ vbroadcasti128 .Lshift_row rRIP, t1; \
+ vbroadcasti128 .Ltf_lo__inv_aff__and__s2 rRIP, t2;\
+ vbroadcasti128 .Ltf_hi__inv_aff__and__s2 rRIP, t3;\
+ vbroadcasti128 .Ltf_lo__x2__and__fwd_aff rRIP, t4;\
+ vbroadcasti128 .Ltf_hi__x2__and__fwd_aff rRIP, t5;\
+ \
+ vaesenclast t7, x0, x0; \
+ vaesenclast t7, x4, x4; \
+ vaesenclast t7, x1, x1; \
+ vaesenclast t7, x5, x5; \
+ vaesdeclast t7, x2, x2; \
+ vaesdeclast t7, x6, x6; \
+ \
+ vpbroadcastd .L0f0f0f0f rRIP, t6; \
+ \
+ /* AES inverse shift rows */ \
+ vpshufb t0, x0, x0; \
+ vpshufb t0, x4, x4; \
+ vpshufb t0, x1, x1; \
+ vpshufb t0, x5, x5; \
+ vpshufb t1, x3, x3; \
+ vpshufb t1, x7, x7; \
+ vpshufb t1, x2, x2; \
+ vpshufb t1, x6, x6; \
+ \
+ /* affine transformation for S2 */ \
+ filter_8bit(x1, t2, t3, t6, t0); \
+ /* affine transformation for S2 */ \
+ filter_8bit(x5, t2, t3, t6, t0); \
+ \
+ /* affine transformation for X2 */ \
+ filter_8bit(x3, t4, t5, t6, t0); \
+ /* affine transformation for X2 */ \
+ filter_8bit(x7, t4, t5, t6, t0); \
+ \
+ vaesdeclast t7, x3, x3; \
+ vaesdeclast t7, x7, x7;
+#endif /* CONFIG_AS_VAES */
+
#define aria_sbox_8way(x0, x1, x2, x3, \
x4, x5, x6, x7, \
t0, t1, t2, t3, \
@@ -432,7 +482,7 @@
vextracti128 $1, x7, t6##_x; \
vaesdeclast t7##_x, x7##_x, x7##_x; \
vaesdeclast t7##_x, t6##_x, t6##_x; \
- vinserti128 $1, t6##_x, x7, x7; \
+ vinserti128 $1, t6##_x, x7, x7;
#define aria_diff_m(x0, x1, x2, x3, \
t0, t1, t2, t3) \
@@ -630,6 +680,7 @@
aria_load_state_8way(y0, y1, y2, y3, \
y4, y5, y6, y7, \
mem_tmp, 8);
+
#ifdef CONFIG_AS_GFNI
#define aria_fe_gfni(x0, x1, x2, x3, \
x4, x5, x6, x7, \
@@ -786,6 +837,155 @@
mem_tmp, 8);
#endif /* CONFIG_AS_GFNI */
+#ifdef CONFIG_AS_VAES
+#define aria_fe_vaes(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_vaes(x2, x3, x0, x1, x6, x7, x4, \
+ x5, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_vaes(x2, x3, x0, x1, x6, x7, x4, \
+ x5, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T3 = ABCD -> BADC \
+ * T3 = y4, y5, y6, y7 -> y5, y4, y7, y6 \
+ * T0 = ABCD -> CDAB \
+ * T0 = x0, x1, x2, x3 -> x2, x3, x0, x1 \
+ * T1 = ABCD -> DCBA \
+ * T1 = x4, x5, x6, x7 -> x7, x6, x5, x4 \
+ */ \
+ aria_diff_word(x2, x3, x0, x1, \
+ x7, x6, x5, x4, \
+ y0, y1, y2, y3, \
+ y5, y4, y7, y6); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_fo_vaes(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_vaes(x0, x1, x2, x3, x4, x5, x6, \
+ x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_vaes(x0, x1, x2, x3, x4, x5, x6, \
+ x7, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_diff_m(x0, x1, x2, x3, y0, y1, y2, y3); \
+ aria_diff_m(x4, x5, x6, x7, y0, y1, y2, y3); \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8); \
+ aria_diff_word(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7); \
+ /* aria_diff_byte() \
+ * T1 = ABCD -> BADC \
+ * T1 = x4, x5, x6, x7 -> x5, x4, x7, x6 \
+ * T2 = ABCD -> CDAB \
+ * T2 = y0, y1, y2, y3, -> y2, y3, y0, y1 \
+ * T3 = ABCD -> DCBA \
+ * T3 = y4, y5, y6, y7 -> y7, y6, y5, y4 \
+ */ \
+ aria_diff_word(x0, x1, x2, x3, \
+ x5, x4, x7, x6, \
+ y2, y3, y0, y1, \
+ y7, y6, y5, y4); \
+ aria_store_state_8way(x3, x2, x1, x0, \
+ x6, x7, x4, x5, \
+ mem_tmp, 0);
+
+#define aria_ff_vaes(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, rk, round, last_round) \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, round); \
+ \
+ aria_sbox_8way_vaes(x2, x3, x0, x1, x6, x7, x4, \
+ x5, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 8, last_round); \
+ \
+ aria_store_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 8); \
+ \
+ aria_load_state_8way(x0, x1, x2, x3, \
+ x4, x5, x6, x7, \
+ mem_tmp, 0); \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, round); \
+ \
+ aria_sbox_8way_vaes(x2, x3, x0, x1, x6, x7, x4, \
+ x5, y0, y1, y2, y3, y4, y5, \
+ y6, y7); \
+ \
+ aria_ark_8way(x0, x1, x2, x3, x4, x5, x6, x7, \
+ y0, rk, 0, last_round); \
+ \
+ aria_load_state_8way(y0, y1, y2, y3, \
+ y4, y5, y6, y7, \
+ mem_tmp, 8);
+#endif /* CONFIG_AS_VAES */
SECTION_RODATA
.align 32
@@ -1294,6 +1494,172 @@ _gcry_aria_aesni_avx2_ctr_crypt_blk32:
ELF(.size _gcry_aria_aesni_avx2_ctr_crypt_blk32,
.-_gcry_aria_aesni_avx2_ctr_crypt_blk32;)
+#ifdef CONFIG_AS_VAES
+.align 16
+ELF(.type __aria_vaes_avx2_crypt_32way,@function;)
+__aria_vaes_avx2_crypt_32way:
+ /* input:
+ * %r9: rk
+ * %rsi: dst
+ * %rdx: src
+ * %ymm0..%ymm15: byte-sliced blocks
+ */
+ CFI_STARTPROC();
+
+ movq %rsi, %rax;
+ leaq 8 * 32(%rax), %r8;
+
+ movl ARIA_CTX_rounds(CTX), %r10d;
+ subl $2, %r10d;
+
+ inpack16_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rax, %r8);
+ aria_fo_vaes(%ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 0);
+ leaq 1*16(%r9), %r9;
+
+.align 16
+.Loop_vaes:
+ aria_fe_vaes(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %rax, %r9, 0);
+ aria_fo_vaes(%ymm9, %ymm8, %ymm11, %ymm10,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %ymm0, %ymm1, %ymm2, %ymm3,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %rax, %r9, 1);
+ leaq 2*16(%r9), %r9;
+ subl $2, %r10d;
+ jnz .Loop_vaes;
+
+ aria_ff_vaes(%ymm1, %ymm0, %ymm3, %ymm2,
+ %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11,
+ %ymm12, %ymm13, %ymm14, %ymm15,
+ %rax, %r9, 0, 1);
+
+ debyteslice_16x16b(%ymm8, %ymm12, %ymm1, %ymm4,
+ %ymm9, %ymm13, %ymm0, %ymm5,
+ %ymm10, %ymm14, %ymm3, %ymm6,
+ %ymm11, %ymm15, %ymm2, %ymm7,
+ (%rax), (%r8));
+
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size __aria_vaes_avx2_crypt_32way,.-__aria_vaes_avx2_crypt_32way;)
+
+.align 16
+.globl _gcry_aria_vaes_avx2_ecb_crypt_blk32
+ELF(.type _gcry_aria_vaes_avx2_ecb_crypt_blk32,@function;)
+_gcry_aria_vaes_avx2_ecb_crypt_blk32:
+ /* input:
+ * %rdi: ctx, CTX
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: round keys
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $(16 * 32), %rsp;
+ andq $~31, %rsp;
+
+ movq %rcx, %r9;
+ movq %rsi, %r11;
+ movq %rsp, %rsi; /* use stack for temporary store */
+
+ inpack16_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rdx);
+
+ call __aria_vaes_avx2_crypt_32way;
+
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %r11);
+
+ movl $STACK_DEPTH, %eax;
+ leave;
+ CFI_LEAVE();
+ vzeroall;
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_aria_vaes_avx2_ecb_crypt_blk32,
+ .-_gcry_aria_vaes_avx2_ecb_crypt_blk32;)
+
+.align 16
+.globl _gcry_aria_vaes_avx2_ctr_crypt_blk32
+ELF(.type _gcry_aria_vaes_avx2_ctr_crypt_blk32,@function;)
+_gcry_aria_vaes_avx2_ctr_crypt_blk32:
+ /* input:
+ * %rdi: ctx
+ * %rsi: dst
+ * %rdx: src
+ * %rcx: iv (big endian, 128bit)
+ */
+ CFI_STARTPROC();
+
+ pushq %rbp;
+ CFI_PUSH(%rbp);
+ movq %rsp, %rbp;
+ CFI_DEF_CFA_REGISTER(%rbp);
+
+ subq $(16 * 32), %rsp;
+ andq $~31, %rsp;
+
+ movq %rcx, %r8; /* %r8: iv */
+ movq %rsp, %rcx; /* %rcx: keystream */
+ call __aria_aesni_avx2_ctr_gen_keystream_32way;
+
+ pushq %rsi;
+ movq %rdx, %r11;
+ movq %rcx, %rsi; /* use stack for temporary store */
+ movq %rcx, %rdx;
+ leaq ARIA_CTX_enc_key(CTX), %r9;
+
+ call __aria_vaes_avx2_crypt_32way;
+
+ popq %rsi;
+ vpxor (0 * 32)(%r11), %ymm1, %ymm1;
+ vpxor (1 * 32)(%r11), %ymm0, %ymm0;
+ vpxor (2 * 32)(%r11), %ymm3, %ymm3;
+ vpxor (3 * 32)(%r11), %ymm2, %ymm2;
+ vpxor (4 * 32)(%r11), %ymm4, %ymm4;
+ vpxor (5 * 32)(%r11), %ymm5, %ymm5;
+ vpxor (6 * 32)(%r11), %ymm6, %ymm6;
+ vpxor (7 * 32)(%r11), %ymm7, %ymm7;
+ vpxor (8 * 32)(%r11), %ymm8, %ymm8;
+ vpxor (9 * 32)(%r11), %ymm9, %ymm9;
+ vpxor (10 * 32)(%r11), %ymm10, %ymm10;
+ vpxor (11 * 32)(%r11), %ymm11, %ymm11;
+ vpxor (12 * 32)(%r11), %ymm12, %ymm12;
+ vpxor (13 * 32)(%r11), %ymm13, %ymm13;
+ vpxor (14 * 32)(%r11), %ymm14, %ymm14;
+ vpxor (15 * 32)(%r11), %ymm15, %ymm15;
+ write_output(%ymm1, %ymm0, %ymm3, %ymm2, %ymm4, %ymm5, %ymm6, %ymm7,
+ %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14,
+ %ymm15, %rsi);
+
+ movl $STACK_DEPTH, %eax;
+ leave;
+ CFI_LEAVE();
+ vzeroall;
+ ret_spec_stop;
+ CFI_ENDPROC();
+ELF(.size _gcry_aria_vaes_avx2_ctr_crypt_blk32,
+ .-_gcry_aria_vaes_avx2_ctr_crypt_blk32;)
+#endif /* CONFIG_AS_VAES */
+
#ifdef CONFIG_AS_GFNI
.align 16
ELF(.type __aria_gfni_avx2_crypt_32way,@function;)
diff --git a/cipher/aria.c b/cipher/aria.c
index 9eb42a2d..bc2d4384 100644
--- a/cipher/aria.c
+++ b/cipher/aria.c
@@ -74,6 +74,12 @@
# endif
#endif
+/* USE_VAES_AVX2 inidicates whether to compile with Intel VAES/AVX2 code. */
+#undef USE_VAES_AVX2
+#if defined(USE_AESNI_AVX2) && defined(HAVE_GCC_INLINE_ASM_VAES_VPCLMUL)
+# define USE_VAES_AVX2 1
+#endif
+
/* USE_GFNI_AVX2 inidicates whether to compile with Intel GFNI/AVX2 code. */
#undef USE_GFNI_AVX2
#if defined(USE_AESNI_AVX2) && defined(ENABLE_GFNI_SUPPORT)
@@ -142,6 +148,7 @@ typedef struct
#endif
#ifdef USE_AESNI_AVX2
unsigned int use_aesni_avx2:1;
+ unsigned int use_vaes_avx2:1;
unsigned int use_gfni_avx2:1;
#endif
#ifdef USE_GFNI_AVX512
@@ -464,12 +471,13 @@ static inline unsigned int
aria_avx_ecb_crypt_blk1_16(const ARIA_context *ctx, byte *out, const byte *in,
const u32 key[][ARIA_RD_KEY_WORDS], size_t nblks)
{
+ if (0) { }
#ifdef USE_GFNI_AVX
- if (ctx->use_gfni_avx)
+ else if (ctx->use_gfni_avx)
return _gcry_aria_gfni_avx_ecb_crypt_blk1_16(ctx, out, in, key, nblks)
+ ASM_EXTRA_STACK;
- else
#endif /* USE_GFNI_AVX */
+ else
return _gcry_aria_aesni_avx_ecb_crypt_blk1_16(ctx, out, in, key, nblks)
+ ASM_EXTRA_STACK;
}
@@ -478,12 +486,13 @@ static inline unsigned int
aria_avx_ctr_crypt_blk16(const ARIA_context *ctx, byte *out, const byte *in,
byte *iv)
{
+ if (0) { }
#ifdef USE_GFNI_AVX
- if (ctx->use_gfni_avx)
+ else if (ctx->use_gfni_avx)
return _gcry_aria_gfni_avx_ctr_crypt_blk16(ctx, out, in, iv)
+ ASM_EXTRA_STACK;
- else
#endif /* USE_GFNI_AVX */
+ else
return _gcry_aria_aesni_avx_ctr_crypt_blk16(ctx, out, in, iv)
+ ASM_EXTRA_STACK;
}
@@ -498,6 +507,16 @@ extern unsigned int
_gcry_aria_aesni_avx2_ctr_crypt_blk32(const void *ctx, byte *out,
const byte *in, byte *iv) ASM_FUNC_ABI;
+#ifdef USE_VAES_AVX2
+extern unsigned int
+_gcry_aria_vaes_avx2_ecb_crypt_blk32(const void *ctx, byte *out,
+ const byte *in,
+ const void *key) ASM_FUNC_ABI;
+extern unsigned int
+_gcry_aria_vaes_avx2_ctr_crypt_blk32(const void *ctx, byte *out,
+ const byte *in, byte *iv) ASM_FUNC_ABI;
+#endif /* USE_VAES_AVX2 */
+
#ifdef USE_GFNI_AVX2
extern unsigned int
_gcry_aria_gfni_avx2_ecb_crypt_blk32(const void *ctx, byte *out,
@@ -512,12 +531,18 @@ static inline unsigned int
aria_avx2_ecb_crypt_blk32(const ARIA_context *ctx, byte *out, const byte *in,
const u32 key[][ARIA_RD_KEY_WORDS])
{
+ if (0) { }
#ifdef USE_GFNI_AVX2
- if (ctx->use_gfni_avx2)
+ else if (ctx->use_gfni_avx2)
return _gcry_aria_gfni_avx2_ecb_crypt_blk32(ctx, out, in, key)
+ ASM_EXTRA_STACK;
- else
#endif /* USE_GFNI_AVX2 */
+#ifdef USE_VAES_AVX2
+ else if (ctx->use_vaes_avx2)
+ return _gcry_aria_vaes_avx2_ecb_crypt_blk32(ctx, out, in, key)
+ + ASM_EXTRA_STACK;
+#endif /* USE_VAES_AVX2 */
+ else
return _gcry_aria_aesni_avx2_ecb_crypt_blk32(ctx, out, in, key)
+ ASM_EXTRA_STACK;
}
@@ -526,12 +551,18 @@ static inline unsigned int
aria_avx2_ctr_crypt_blk32(const ARIA_context *ctx, byte *out, const byte *in,
byte *iv)
{
+ if (0) { }
#ifdef USE_GFNI_AVX2
- if (ctx->use_gfni_avx2)
+ else if (ctx->use_gfni_avx2)
return _gcry_aria_gfni_avx2_ctr_crypt_blk32(ctx, out, in, iv)
+ ASM_EXTRA_STACK;
- else
#endif /* USE_GFNI_AVX2 */
+#ifdef USE_VAES_AVX2
+ else if (ctx->use_vaes_avx2)
+ return _gcry_aria_vaes_avx2_ctr_crypt_blk32(ctx, out, in, iv)
+ + ASM_EXTRA_STACK;
+#endif /* USE_VAES_AVX2 */
+ else
return _gcry_aria_aesni_avx2_ctr_crypt_blk32(ctx, out, in, iv)
+ ASM_EXTRA_STACK;
}
@@ -1614,6 +1645,9 @@ aria_setkey(void *c, const byte *key, unsigned keylen,
#ifdef USE_GFNI_AVX2
ctx->use_gfni_avx2 = (hwf & HWF_INTEL_GFNI) && (hwf & HWF_INTEL_AVX2);
#endif
+#ifdef USE_VAES_AVX2
+ ctx->use_vaes_avx2 = (hwf & HWF_INTEL_VAES_VPCLMUL) && (hwf & HWF_INTEL_AVX2);
+#endif
#ifdef USE_AESNI_AVX
ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX);
#endif