summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJussi Kivilinna <jussi.kivilinna@iki.fi>2013-11-19 23:26:26 +0200
committerJussi Kivilinna <jussi.kivilinna@iki.fi>2013-11-20 18:26:58 +0200
commit5a65ffabadd50f174ab7375faad7a726cce49e61 (patch)
tree3ef317ba516e666d1c214fdf6defaa8b80f06dbf
parent0e9e7d72f3c9eb7ac832746c3034855faaf8d02c (diff)
downloadlibgcrypt-5a65ffabadd50f174ab7375faad7a726cce49e61.tar.gz
Add Intel PCLMUL acceleration for GCM
* cipher/cipher-gcm.c (fillM): Rename... (do_fillM): ...to this. (ghash): Remove. (fillM): New macro. (GHASH): Use 'do_ghash' instead of 'ghash'. [GCM_USE_INTEL_PCLMUL] (do_ghash_pclmul): New. (ghash): New. (setupM): New. (_gcry_cipher_gcm_encrypt, _gcry_cipher_gcm_decrypt) (_gcry_cipher_gcm_authenticate, _gcry_cipher_gcm_setiv) (_gcry_cipher_gcm_tag): Use 'ghash' instead of 'GHASH' and 'c->u_mode.gcm.u_tag.tag' instead of 'c->u_tag.tag'. * cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): New. (gcry_cipher_handle): Move 'u_tag' and 'gcm_table' under 'u_mode.gcm'. * configure.ac (pclmulsupport, gcry_cv_gcc_inline_asm_pclmul): New. * src/g10lib.h (HWF_INTEL_PCLMUL): New. * src/global.c: Add "intel-pclmul". * src/hwf-x86.c (detect_x86_gnuc): Add check for Intel PCLMUL. -- Speed-up GCM for Intel CPUs. Intel Haswell (x86-64): Old: AES GCM enc | 5.17 ns/B 184.4 MiB/s 16.55 c/B GCM dec | 4.38 ns/B 218.0 MiB/s 14.00 c/B GCM auth | 3.17 ns/B 300.4 MiB/s 10.16 c/B New: AES GCM enc | 3.01 ns/B 317.2 MiB/s 9.62 c/B GCM dec | 1.96 ns/B 486.9 MiB/s 6.27 c/B GCM auth | 0.848 ns/B 1124.8 MiB/s 2.71 c/B Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r--cipher/cipher-gcm.c205
-rw-r--r--cipher/cipher-internal.h55
-rw-r--r--configure.ac35
-rw-r--r--src/g10lib.h1
-rw-r--r--src/global.c1
-rw-r--r--src/hwf-x86.c5
6 files changed, 256 insertions, 46 deletions
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c
index d7fc0d85..b93f0fa7 100644
--- a/cipher/cipher-gcm.c
+++ b/cipher/cipher-gcm.c
@@ -1,5 +1,6 @@
/* cipher-gcm.c - Generic Galois Counter Mode implementation
* Copyright (C) 2013 Dmitry Eremin-Solenikov
+ * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is part of Libgcrypt.
*
@@ -81,7 +82,7 @@ bshift (u64 * b0, u64 * b1)
}
static void
-fillM (unsigned char *h, u64 * M)
+do_fillM (unsigned char *h, u64 *M)
{
int i, j;
@@ -179,7 +180,7 @@ bshift (u32 * M, int i)
}
static void
-fillM (unsigned char *h, u32 * M)
+do_fillM (unsigned char *h, u32 *M)
{
int i, j;
@@ -269,15 +270,10 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 * gcmM)
buf_put_be32 (result + 8, tmp[2]);
buf_put_be32 (result + 12, tmp[3]);
}
-#endif
-
-static void
-ghash (unsigned char *result, const unsigned char *buf, const void *gcmM)
-{
- do_ghash (result, buf, gcmM);
-}
+#endif /* !HAVE_U64_TYPEDEF || SIZEOF_UNSIGNED_LONG != 8 */
-#define GHASH(c, result, buf) ghash (result, buf, c->gcm_table);
+#define fillM(c, h) do_fillM (h, c->u_mode.gcm.gcm_table)
+#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table)
#else
@@ -296,7 +292,7 @@ bshift (unsigned long *b)
}
static void
-ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
+do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
{
unsigned long V[4];
int i, j;
@@ -339,10 +335,161 @@ ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf)
#endif
}
-#define fillM(h, M) do { } while (0)
+#define fillM(c, h) do { } while (0)
+#define GHASH(c, result, buf) do_ghash (c->u_iv.iv, result, buf)
+
+#endif /* !GCM_USE_TABLES */
+
+
+#ifdef GCM_USE_INTEL_PCLMUL
+/*
+ Intel PCLMUL ghash based on white paper:
+ "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the
+ GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis.
+ */
+static void
+do_ghash_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf)
+{
+ static const unsigned char be_mask[16] __attribute__ ((aligned (16))) =
+ { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 };
+
+ asm volatile ("movdqu (%[result]), %%xmm1\n\t"
+ "movdqu %[buf], %%xmm2\n\t"
+ "movdqa %[hsub], %%xmm0\n\t"
+ "pxor %%xmm2, %%xmm1\n\t" /* big endian */
+
+ /* be => le */
+ "pshufb %[be_mask], %%xmm1\n\t"
+
+ /* gfmul, xmm0 has operator a and xmm1 has operator b. */
+ "pshufd $78, %%xmm0, %%xmm2\n\t"
+ "pshufd $78, %%xmm1, %%xmm4\n\t"
+ "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */
+ "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */
+
+ "movdqa %%xmm0, %%xmm3\n\t"
+ "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */
+ "movdqa %%xmm0, %%xmm6\n\t"
+ "pclmulqdq $17, %%xmm1, %%xmm6\n\t" /* xmm6 holds a1*b1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */
+
+ "pxor %%xmm6, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */
+ "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */
+ "movdqa %%xmm4, %%xmm5\n\t"
+ "psrldq $8, %%xmm4\n\t"
+ "pslldq $8, %%xmm5\n\t"
+ "pxor %%xmm5, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm6\n\t" /* <xmm6:xmm3> holds the result of the
+ carry-less multiplication of xmm0
+ by xmm1 */
+
+ /* shift the result by one bit position to the left cope for
+ the fact that bits are reversed */
+ "movdqa %%xmm3, %%xmm7\n\t"
+ "movdqa %%xmm6, %%xmm0\n\t"
+ "pslld $1, %%xmm3\n\t"
+ "pslld $1, %%xmm6\n\t"
+ "psrld $31, %%xmm7\n\t"
+ "psrld $31, %%xmm0\n\t"
+ "movdqa %%xmm7, %%xmm1\n\t"
+ "pslldq $4, %%xmm0\n\t"
+ "pslldq $4, %%xmm7\n\t"
+ "psrldq $12, %%xmm1\n\t"
+ "por %%xmm7, %%xmm3\n\t"
+ "por %%xmm0, %%xmm6\n\t"
+ "por %%xmm1, %%xmm6\n\t"
+
+ /* first phase of the reduction */
+ "movdqa %%xmm3, %%xmm7\n\t"
+ "movdqa %%xmm3, %%xmm0\n\t"
+ "pslld $31, %%xmm7\n\t" /* packed right shifting << 31 */
+ "movdqa %%xmm3, %%xmm1\n\t"
+ "pslld $30, %%xmm0\n\t" /* packed right shifting shift << 30 */
+ "pslld $25, %%xmm1\n\t" /* packed right shifting shift << 25 */
+ "pxor %%xmm0, %%xmm7\n\t" /* xor the shifted versions */
+ "pxor %%xmm1, %%xmm7\n\t"
+ "movdqa %%xmm7, %%xmm0\n\t"
+ "pslldq $12, %%xmm7\n\t"
+ "psrldq $4, %%xmm0\n\t"
+ "pxor %%xmm7, %%xmm3\n\t" /* first phase of the reduction
+ complete */
+
+ /* second phase of the reduction */
+ "movdqa %%xmm3, %%xmm2\n\t"
+ "movdqa %%xmm3, %%xmm4\n\t"
+ "psrld $1, %%xmm2\n\t" /* packed left shifting >> 1 */
+ "movdqa %%xmm3, %%xmm5\n\t"
+ "psrld $2, %%xmm4\n\t" /* packed left shifting >> 2 */
+ "psrld $7, %%xmm5\n\t" /* packed left shifting >> 7 */
+ "pxor %%xmm4, %%xmm2\n\t" /* xor the shifted versions */
+ "pxor %%xmm5, %%xmm2\n\t"
+ "pxor %%xmm0, %%xmm2\n\t"
+ "pxor %%xmm2, %%xmm3\n\t"
+ "pxor %%xmm3, %%xmm6\n\t" /* the result is in xmm6 */
+
+ /* le => be */
+ "pshufb %[be_mask], %%xmm6\n\t"
+
+ "movdqu %%xmm6, (%[result])\n\t" /* store the result */
+ :
+ : [result] "r" (result), [buf] "m" (*buf),
+ [hsub] "m" (*c->u_iv.iv), [be_mask] "m" (*be_mask)
+ : "memory" );
+}
+
+#endif /*GCM_USE_INTEL_PCLMUL*/
+
+
+static void
+ghash (gcry_cipher_hd_t c, unsigned char *result, const unsigned char *buf)
+{
+ if (0)
+ ;
+#ifdef GCM_USE_INTEL_PCLMUL
+ else if (c->u_mode.gcm.use_intel_pclmul)
+ {
+ /* TODO: Loop structure, use bit-reflection and add faster bulk
+ processing (parallel four blocks). */
+ do_ghash_pclmul (c, result, buf);
+
+ /* Clear used registers. */
+ asm volatile( "pxor %%xmm0, %%xmm0\n\t"
+ "pxor %%xmm1, %%xmm1\n\t"
+ "pxor %%xmm2, %%xmm2\n\t"
+ "pxor %%xmm3, %%xmm3\n\t"
+ "pxor %%xmm4, %%xmm4\n\t"
+ "pxor %%xmm5, %%xmm5\n\t"
+ "pxor %%xmm6, %%xmm6\n\t"
+ "pxor %%xmm7, %%xmm7\n\t"
+ ::: "cc" );
+ }
+#endif
+ else
+ GHASH (c, result, buf);
+}
+
+static void
+setupM (gcry_cipher_hd_t c, byte *h)
+{
+ if (0)
+ ;
+#ifdef GCM_USE_INTEL_PCLMUL
+ else if (_gcry_get_hw_features () & HWF_INTEL_PCLMUL)
+ {
+ u64 tmp[2];
+
+ c->u_mode.gcm.use_intel_pclmul = 1;
-#define GHASH(c, result, buf) ghash (c->u_iv.iv, result, buf);
+ /* Swap endianness of hsub. */
+ tmp[0] = buf_get_be64(c->u_iv.iv + 8);
+ tmp[1] = buf_get_be64(c->u_iv.iv + 0);
+ buf_cpy (c->u_iv.iv, tmp, 16);
+ }
#endif
+ else
+ fillM (c, h);
+}
gcry_err_code_t
@@ -389,12 +536,12 @@ _gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c,
{
buf_xor_2dst (outbuf, tmp, inbuf, n);
memset (tmp + n, 0, blocksize - n);
- GHASH (c, c->u_tag.tag, tmp);
+ ghash (c, c->u_mode.gcm.u_tag.tag, tmp);
}
else
{
buf_xor (outbuf, tmp, inbuf, n);
- GHASH (c, c->u_tag.tag, outbuf);
+ ghash (c, c->u_mode.gcm.u_tag.tag, outbuf);
}
inbuflen -= n;
@@ -442,11 +589,11 @@ _gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c,
{
memcpy (tmp, inbuf, n);
memset (tmp + n, 0, blocksize - n);
- GHASH (c, c->u_tag.tag, tmp);
+ ghash (c, c->u_mode.gcm.u_tag.tag, tmp);
}
else
{
- GHASH (c, c->u_tag.tag, inbuf);
+ ghash (c, c->u_mode.gcm.u_tag.tag, inbuf);
}
i = blocksize - 1;
@@ -490,7 +637,7 @@ _gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c,
while (aadbuflen >= blocksize)
{
- GHASH (c, c->u_tag.tag, aadbuf);
+ ghash (c, c->u_mode.gcm.u_tag.tag, aadbuf);
aadbuflen -= blocksize;
aadbuf += blocksize;
@@ -501,7 +648,7 @@ _gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c,
memcpy (tmp, aadbuf, aadbuflen);
memset (tmp + aadbuflen, 0, blocksize - aadbuflen);
- GHASH (c, c->u_tag.tag, tmp);
+ ghash (c, c->u_mode.gcm.u_tag.tag, tmp);
}
return 0;
@@ -512,10 +659,10 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c,
const byte * iv, unsigned int ivlen)
{
memset (c->length, 0, 16);
- memset (c->u_tag.tag, 0, 16);
- c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_tag.tag);
+ memset (c->u_mode.gcm.u_tag.tag, 0, 16);
+ c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_mode.gcm.u_tag.tag);
- fillM (c->u_iv.iv, c->gcm_table);
+ setupM (c, c->u_iv.iv);
if (ivlen != 16 - 4)
{
@@ -523,12 +670,12 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c,
unsigned n;
memset (c->u_ctr.ctr, 0, 16);
for (n = ivlen; n >= 16; n -= 16, iv += 16)
- GHASH (c, c->u_ctr.ctr, iv);
+ ghash (c, c->u_ctr.ctr, iv);
if (n != 0)
{
memcpy (tmp, iv, n);
memset (tmp + n, 0, 16 - n);
- GHASH (c, c->u_ctr.ctr, tmp);
+ ghash (c, c->u_ctr.ctr, tmp);
}
memset (tmp, 0, 16);
n = 16;
@@ -537,7 +684,7 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c,
n--;
for (; n > 0; n--, ivlen >>= 8)
tmp[n - 1] = ivlen & 0xff;
- GHASH (c, c->u_ctr.ctr, tmp);
+ ghash (c, c->u_ctr.ctr, tmp);
}
else
{
@@ -560,19 +707,19 @@ _gcry_cipher_gcm_tag (gcry_cipher_hd_t c,
if (!c->marks.tag)
{
- GHASH (c, c->u_tag.tag, c->length);
- buf_xor (c->u_tag.tag, c->lastiv, c->u_tag.tag, 16);
+ ghash (c, c->u_mode.gcm.u_tag.tag, c->length);
+ buf_xor (c->u_mode.gcm.u_tag.tag, c->lastiv, c->u_mode.gcm.u_tag.tag, 16);
c->marks.tag = 1;
}
if (!check)
{
- memcpy (outbuf, c->u_tag.tag, outbuflen);
+ memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen);
return GPG_ERR_NO_ERROR;
}
else
{
- return buf_eq_const(outbuf, c->u_tag.tag, outbuflen) ?
+ return buf_eq_const(outbuf, c->u_mode.gcm.u_tag.tag, outbuflen) ?
GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM;
}
diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h
index b4d0ff9d..a6e62717 100644
--- a/cipher/cipher-internal.h
+++ b/cipher/cipher-internal.h
@@ -39,6 +39,18 @@
#define GCM_USE_TABLES 1
+/* GCM_USE_INTEL_PCLMUL inidicates whether to compile GCM with Intel PCLMUL
+ code. */
+#undef GCM_USE_INTEL_PCLMUL
+#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES)
+# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__))
+# if __GNUC__ >= 4
+# define GCM_USE_INTEL_PCLMUL 1
+# endif
+# endif
+#endif /* GCM_USE_INTEL_PCLMUL */
+
+
/* A VIA processor with the Padlock engine as well as the Intel AES_NI
instructions require an alignment of most data on a 16 byte
boundary. Because we trick out the compiler while allocating the
@@ -118,26 +130,10 @@ struct gcry_cipher_handle
unsigned char ctr[MAX_BLOCKSIZE];
} u_ctr;
- /* The interim tag for GCM mode. */
- union {
- cipher_context_alignment_t iv_align;
- unsigned char tag[MAX_BLOCKSIZE];
- } u_tag;
-
/* Space to save an IV or CTR for chaining operations. */
unsigned char lastiv[MAX_BLOCKSIZE];
int unused; /* Number of unused bytes in LASTIV. */
unsigned char length[MAX_BLOCKSIZE]; /* bit counters for GCM */
-#ifdef GCM_USE_TABLES
- #if defined(HAVE_U64_TYPEDEF) && \
- (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__))
- #define GCM_TABLES_USE_U64 1
- u64 gcm_table[2 * 16]; /* pre-calculated table for GCM */
- #else
- #undef GCM_TABLES_USE_U64
- u32 gcm_table[4 * 16]; /* pre-calculated table for GCM */
- #endif
-#endif
union {
/* Mode specific storage for CCM mode. */
@@ -156,6 +152,7 @@ struct gcry_cipher_handle
unsigned int lengths:1; /* Set to 1 if CCM length parameters has been
processed. */
} ccm;
+
/* Mode specific storage for CMAC mode. */
struct {
unsigned int tag:1; /* Set to 1 if tag has been finalized. */
@@ -163,8 +160,32 @@ struct gcry_cipher_handle
/* Subkeys for tag creation, not cleared by gcry_cipher_reset. */
unsigned char subkeys[2][MAX_BLOCKSIZE];
} cmac;
- } u_mode;
+ /* Mode specific storage for GCM mode. */
+ struct {
+ /* The interim tag for GCM mode. */
+ union {
+ cipher_context_alignment_t iv_align;
+ unsigned char tag[MAX_BLOCKSIZE];
+ } u_tag;
+
+ /* Pre-calculated table for GCM. */
+#ifdef GCM_USE_TABLES
+ #if defined(HAVE_U64_TYPEDEF) && (SIZEOF_UNSIGNED_LONG == 8 \
+ || defined(__x86_64__))
+ #define GCM_TABLES_USE_U64 1
+ u64 gcm_table[2 * 16];
+ #else
+ #undef GCM_TABLES_USE_U64
+ u32 gcm_table[4 * 16];
+ #endif
+#endif
+
+#ifdef GCM_USE_INTEL_PCLMUL
+ unsigned int use_intel_pclmul:1;
+#endif
+ } gcm;
+ } u_mode;
/* What follows are two contexts of the cipher in use. The first
one needs to be aligned well enough for the cipher operation
diff --git a/configure.ac b/configure.ac
index c4f8776b..6d403436 100644
--- a/configure.ac
+++ b/configure.ac
@@ -567,6 +567,14 @@ AC_ARG_ENABLE(aesni-support,
aesnisupport=$enableval,aesnisupport=yes)
AC_MSG_RESULT($aesnisupport)
+# Implementation of the --disable-pclmul-support switch.
+AC_MSG_CHECKING([whether PCLMUL support is requested])
+AC_ARG_ENABLE(pclmul-support,
+ AC_HELP_STRING([--disable-pclmul-support],
+ [Disable support for the Intel PCLMUL instructions]),
+ pclmulsupport=$enableval,pclmulsupport=yes)
+AC_MSG_RESULT($pclmulsupport)
+
# Implementation of the --disable-drng-support switch.
AC_MSG_CHECKING([whether DRNG support is requested])
AC_ARG_ENABLE(drng-support,
@@ -991,6 +999,23 @@ fi
#
+# Check whether GCC inline assembler supports PCLMUL instructions.
+#
+AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions],
+ [gcry_cv_gcc_inline_asm_pclmul],
+ [gcry_cv_gcc_inline_asm_pclmul=no
+ AC_COMPILE_IFELSE([AC_LANG_SOURCE(
+ [[void a(void) {
+ __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc");
+ }]])],
+ [gcry_cv_gcc_inline_asm_pclmul=yes])])
+if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then
+ AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1,
+ [Defined if inline assembler supports PCLMUL instructions])
+fi
+
+
+#
# Check whether GCC inline assembler supports AVX instructions
#
AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions],
@@ -1369,6 +1394,11 @@ if test x"$aesnisupport" = xyes ; then
aesnisupport="no (unsupported by compiler)"
fi
fi
+if test x"$pclmulsupport" = xyes ; then
+ if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then
+ pclmulsupport="no (unsupported by compiler)"
+ fi
+fi
if test x"$avxsupport" = xyes ; then
if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then
avxsupport="no (unsupported by compiler)"
@@ -1389,6 +1419,10 @@ if test x"$aesnisupport" = xyes ; then
AC_DEFINE(ENABLE_AESNI_SUPPORT, 1,
[Enable support for Intel AES-NI instructions.])
fi
+if test x"$pclmulsupport" = xyes ; then
+ AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1,
+ [Enable support for Intel PCLMUL instructions.])
+fi
if test x"$avxsupport" = xyes ; then
AC_DEFINE(ENABLE_AVX_SUPPORT,1,
[Enable support for Intel AVX instructions.])
@@ -1826,6 +1860,7 @@ GCRY_MSG_SHOW([Random number generator: ],[$random])
GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities])
GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport])
GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport])
+GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport])
GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport])
GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport])
GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support])
diff --git a/src/g10lib.h b/src/g10lib.h
index ae4502cd..0612cbc7 100644
--- a/src/g10lib.h
+++ b/src/g10lib.h
@@ -167,6 +167,7 @@ int _gcry_log_verbosity( int level );
#define HWF_PADLOCK_SHA 4
#define HWF_PADLOCK_MMUL 8
+#define HWF_INTEL_PCLMUL 128
#define HWF_INTEL_AESNI 256
#define HWF_INTEL_RDRAND 512
#define HWF_INTEL_AVX 1024
diff --git a/src/global.c b/src/global.c
index 44667cfb..841f1889 100644
--- a/src/global.c
+++ b/src/global.c
@@ -66,6 +66,7 @@ static struct
{ HWF_PADLOCK_AES, "padlock-aes" },
{ HWF_PADLOCK_SHA, "padlock-sha" },
{ HWF_PADLOCK_MMUL,"padlock-mmul"},
+ { HWF_INTEL_PCLMUL,"intel-pclmul" },
{ HWF_INTEL_AESNI, "intel-aesni" },
{ HWF_INTEL_RDRAND,"intel-rdrand" },
{ HWF_INTEL_AVX, "intel-avx" },
diff --git a/src/hwf-x86.c b/src/hwf-x86.c
index 2ceb04c8..784fe2a4 100644
--- a/src/hwf-x86.c
+++ b/src/hwf-x86.c
@@ -201,6 +201,11 @@ detect_x86_gnuc (void)
/* Get CPU info and Intel feature flags (ECX). */
get_cpuid(1, NULL, NULL, &features, NULL);
+#ifdef ENABLE_PCLMUL_SUPPORT
+ /* Test bit 1 for PCLMUL. */
+ if (features & 0x00000002)
+ result |= HWF_INTEL_PCLMUL;
+#endif
#ifdef ENABLE_AESNI_SUPPORT
/* Test bit 25 for AES-NI. */
if (features & 0x02000000)