diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-11-19 23:26:26 +0200 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2013-11-20 18:26:58 +0200 |
commit | 5a65ffabadd50f174ab7375faad7a726cce49e61 (patch) | |
tree | 3ef317ba516e666d1c214fdf6defaa8b80f06dbf | |
parent | 0e9e7d72f3c9eb7ac832746c3034855faaf8d02c (diff) | |
download | libgcrypt-5a65ffabadd50f174ab7375faad7a726cce49e61.tar.gz |
Add Intel PCLMUL acceleration for GCM
* cipher/cipher-gcm.c (fillM): Rename...
(do_fillM): ...to this.
(ghash): Remove.
(fillM): New macro.
(GHASH): Use 'do_ghash' instead of 'ghash'.
[GCM_USE_INTEL_PCLMUL] (do_ghash_pclmul): New.
(ghash): New.
(setupM): New.
(_gcry_cipher_gcm_encrypt, _gcry_cipher_gcm_decrypt)
(_gcry_cipher_gcm_authenticate, _gcry_cipher_gcm_setiv)
(_gcry_cipher_gcm_tag): Use 'ghash' instead of 'GHASH' and
'c->u_mode.gcm.u_tag.tag' instead of 'c->u_tag.tag'.
* cipher/cipher-internal.h (GCM_USE_INTEL_PCLMUL): New.
(gcry_cipher_handle): Move 'u_tag' and 'gcm_table' under
'u_mode.gcm'.
* configure.ac (pclmulsupport, gcry_cv_gcc_inline_asm_pclmul): New.
* src/g10lib.h (HWF_INTEL_PCLMUL): New.
* src/global.c: Add "intel-pclmul".
* src/hwf-x86.c (detect_x86_gnuc): Add check for Intel PCLMUL.
--
Speed-up GCM for Intel CPUs.
Intel Haswell (x86-64):
Old:
AES GCM enc | 5.17 ns/B 184.4 MiB/s 16.55 c/B
GCM dec | 4.38 ns/B 218.0 MiB/s 14.00 c/B
GCM auth | 3.17 ns/B 300.4 MiB/s 10.16 c/B
New:
AES GCM enc | 3.01 ns/B 317.2 MiB/s 9.62 c/B
GCM dec | 1.96 ns/B 486.9 MiB/s 6.27 c/B
GCM auth | 0.848 ns/B 1124.8 MiB/s 2.71 c/B
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
-rw-r--r-- | cipher/cipher-gcm.c | 205 | ||||
-rw-r--r-- | cipher/cipher-internal.h | 55 | ||||
-rw-r--r-- | configure.ac | 35 | ||||
-rw-r--r-- | src/g10lib.h | 1 | ||||
-rw-r--r-- | src/global.c | 1 | ||||
-rw-r--r-- | src/hwf-x86.c | 5 |
6 files changed, 256 insertions, 46 deletions
diff --git a/cipher/cipher-gcm.c b/cipher/cipher-gcm.c index d7fc0d85..b93f0fa7 100644 --- a/cipher/cipher-gcm.c +++ b/cipher/cipher-gcm.c @@ -1,5 +1,6 @@ /* cipher-gcm.c - Generic Galois Counter Mode implementation * Copyright (C) 2013 Dmitry Eremin-Solenikov + * Copyright © 2013 Jussi Kivilinna <jussi.kivilinna@iki.fi> * * This file is part of Libgcrypt. * @@ -81,7 +82,7 @@ bshift (u64 * b0, u64 * b1) } static void -fillM (unsigned char *h, u64 * M) +do_fillM (unsigned char *h, u64 *M) { int i, j; @@ -179,7 +180,7 @@ bshift (u32 * M, int i) } static void -fillM (unsigned char *h, u32 * M) +do_fillM (unsigned char *h, u32 *M) { int i, j; @@ -269,15 +270,10 @@ do_ghash (unsigned char *result, const unsigned char *buf, const u32 * gcmM) buf_put_be32 (result + 8, tmp[2]); buf_put_be32 (result + 12, tmp[3]); } -#endif - -static void -ghash (unsigned char *result, const unsigned char *buf, const void *gcmM) -{ - do_ghash (result, buf, gcmM); -} +#endif /* !HAVE_U64_TYPEDEF || SIZEOF_UNSIGNED_LONG != 8 */ -#define GHASH(c, result, buf) ghash (result, buf, c->gcm_table); +#define fillM(c, h) do_fillM (h, c->u_mode.gcm.gcm_table) +#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table) #else @@ -296,7 +292,7 @@ bshift (unsigned long *b) } static void -ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) +do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) { unsigned long V[4]; int i, j; @@ -339,10 +335,161 @@ ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) #endif } -#define fillM(h, M) do { } while (0) +#define fillM(c, h) do { } while (0) +#define GHASH(c, result, buf) do_ghash (c->u_iv.iv, result, buf) + +#endif /* !GCM_USE_TABLES */ + + +#ifdef GCM_USE_INTEL_PCLMUL +/* + Intel PCLMUL ghash based on white paper: + "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the + GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis. + */ +static void +do_ghash_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + asm volatile ("movdqu (%[result]), %%xmm1\n\t" + "movdqu %[buf], %%xmm2\n\t" + "movdqa %[hsub], %%xmm0\n\t" + "pxor %%xmm2, %%xmm1\n\t" /* big endian */ + + /* be => le */ + "pshufb %[be_mask], %%xmm1\n\t" + + /* gfmul, xmm0 has operator a and xmm1 has operator b. */ + "pshufd $78, %%xmm0, %%xmm2\n\t" + "pshufd $78, %%xmm1, %%xmm4\n\t" + "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */ + + "movdqa %%xmm0, %%xmm3\n\t" + "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */ + "movdqa %%xmm0, %%xmm6\n\t" + "pclmulqdq $17, %%xmm1, %%xmm6\n\t" /* xmm6 holds a1*b1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm6\n\t" /* <xmm6:xmm3> holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + + /* shift the result by one bit position to the left cope for + the fact that bits are reversed */ + "movdqa %%xmm3, %%xmm7\n\t" + "movdqa %%xmm6, %%xmm0\n\t" + "pslld $1, %%xmm3\n\t" + "pslld $1, %%xmm6\n\t" + "psrld $31, %%xmm7\n\t" + "psrld $31, %%xmm0\n\t" + "movdqa %%xmm7, %%xmm1\n\t" + "pslldq $4, %%xmm0\n\t" + "pslldq $4, %%xmm7\n\t" + "psrldq $12, %%xmm1\n\t" + "por %%xmm7, %%xmm3\n\t" + "por %%xmm0, %%xmm6\n\t" + "por %%xmm1, %%xmm6\n\t" + + /* first phase of the reduction */ + "movdqa %%xmm3, %%xmm7\n\t" + "movdqa %%xmm3, %%xmm0\n\t" + "pslld $31, %%xmm7\n\t" /* packed right shifting << 31 */ + "movdqa %%xmm3, %%xmm1\n\t" + "pslld $30, %%xmm0\n\t" /* packed right shifting shift << 30 */ + "pslld $25, %%xmm1\n\t" /* packed right shifting shift << 25 */ + "pxor %%xmm0, %%xmm7\n\t" /* xor the shifted versions */ + "pxor %%xmm1, %%xmm7\n\t" + "movdqa %%xmm7, %%xmm0\n\t" + "pslldq $12, %%xmm7\n\t" + "psrldq $4, %%xmm0\n\t" + "pxor %%xmm7, %%xmm3\n\t" /* first phase of the reduction + complete */ + + /* second phase of the reduction */ + "movdqa %%xmm3, %%xmm2\n\t" + "movdqa %%xmm3, %%xmm4\n\t" + "psrld $1, %%xmm2\n\t" /* packed left shifting >> 1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "psrld $2, %%xmm4\n\t" /* packed left shifting >> 2 */ + "psrld $7, %%xmm5\n\t" /* packed left shifting >> 7 */ + "pxor %%xmm4, %%xmm2\n\t" /* xor the shifted versions */ + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm0, %%xmm2\n\t" + "pxor %%xmm2, %%xmm3\n\t" + "pxor %%xmm3, %%xmm6\n\t" /* the result is in xmm6 */ + + /* le => be */ + "pshufb %[be_mask], %%xmm6\n\t" + + "movdqu %%xmm6, (%[result])\n\t" /* store the result */ + : + : [result] "r" (result), [buf] "m" (*buf), + [hsub] "m" (*c->u_iv.iv), [be_mask] "m" (*be_mask) + : "memory" ); +} + +#endif /*GCM_USE_INTEL_PCLMUL*/ + + +static void +ghash (gcry_cipher_hd_t c, unsigned char *result, const unsigned char *buf) +{ + if (0) + ; +#ifdef GCM_USE_INTEL_PCLMUL + else if (c->u_mode.gcm.use_intel_pclmul) + { + /* TODO: Loop structure, use bit-reflection and add faster bulk + processing (parallel four blocks). */ + do_ghash_pclmul (c, result, buf); + + /* Clear used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" + ::: "cc" ); + } +#endif + else + GHASH (c, result, buf); +} + +static void +setupM (gcry_cipher_hd_t c, byte *h) +{ + if (0) + ; +#ifdef GCM_USE_INTEL_PCLMUL + else if (_gcry_get_hw_features () & HWF_INTEL_PCLMUL) + { + u64 tmp[2]; + + c->u_mode.gcm.use_intel_pclmul = 1; -#define GHASH(c, result, buf) ghash (c->u_iv.iv, result, buf); + /* Swap endianness of hsub. */ + tmp[0] = buf_get_be64(c->u_iv.iv + 8); + tmp[1] = buf_get_be64(c->u_iv.iv + 0); + buf_cpy (c->u_iv.iv, tmp, 16); + } #endif + else + fillM (c, h); +} gcry_err_code_t @@ -389,12 +536,12 @@ _gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c, { buf_xor_2dst (outbuf, tmp, inbuf, n); memset (tmp + n, 0, blocksize - n); - GHASH (c, c->u_tag.tag, tmp); + ghash (c, c->u_mode.gcm.u_tag.tag, tmp); } else { buf_xor (outbuf, tmp, inbuf, n); - GHASH (c, c->u_tag.tag, outbuf); + ghash (c, c->u_mode.gcm.u_tag.tag, outbuf); } inbuflen -= n; @@ -442,11 +589,11 @@ _gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c, { memcpy (tmp, inbuf, n); memset (tmp + n, 0, blocksize - n); - GHASH (c, c->u_tag.tag, tmp); + ghash (c, c->u_mode.gcm.u_tag.tag, tmp); } else { - GHASH (c, c->u_tag.tag, inbuf); + ghash (c, c->u_mode.gcm.u_tag.tag, inbuf); } i = blocksize - 1; @@ -490,7 +637,7 @@ _gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c, while (aadbuflen >= blocksize) { - GHASH (c, c->u_tag.tag, aadbuf); + ghash (c, c->u_mode.gcm.u_tag.tag, aadbuf); aadbuflen -= blocksize; aadbuf += blocksize; @@ -501,7 +648,7 @@ _gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c, memcpy (tmp, aadbuf, aadbuflen); memset (tmp + aadbuflen, 0, blocksize - aadbuflen); - GHASH (c, c->u_tag.tag, tmp); + ghash (c, c->u_mode.gcm.u_tag.tag, tmp); } return 0; @@ -512,10 +659,10 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte * iv, unsigned int ivlen) { memset (c->length, 0, 16); - memset (c->u_tag.tag, 0, 16); - c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_tag.tag); + memset (c->u_mode.gcm.u_tag.tag, 0, 16); + c->spec->encrypt (&c->context.c, c->u_iv.iv, c->u_mode.gcm.u_tag.tag); - fillM (c->u_iv.iv, c->gcm_table); + setupM (c, c->u_iv.iv); if (ivlen != 16 - 4) { @@ -523,12 +670,12 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, unsigned n; memset (c->u_ctr.ctr, 0, 16); for (n = ivlen; n >= 16; n -= 16, iv += 16) - GHASH (c, c->u_ctr.ctr, iv); + ghash (c, c->u_ctr.ctr, iv); if (n != 0) { memcpy (tmp, iv, n); memset (tmp + n, 0, 16 - n); - GHASH (c, c->u_ctr.ctr, tmp); + ghash (c, c->u_ctr.ctr, tmp); } memset (tmp, 0, 16); n = 16; @@ -537,7 +684,7 @@ _gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, n--; for (; n > 0; n--, ivlen >>= 8) tmp[n - 1] = ivlen & 0xff; - GHASH (c, c->u_ctr.ctr, tmp); + ghash (c, c->u_ctr.ctr, tmp); } else { @@ -560,19 +707,19 @@ _gcry_cipher_gcm_tag (gcry_cipher_hd_t c, if (!c->marks.tag) { - GHASH (c, c->u_tag.tag, c->length); - buf_xor (c->u_tag.tag, c->lastiv, c->u_tag.tag, 16); + ghash (c, c->u_mode.gcm.u_tag.tag, c->length); + buf_xor (c->u_mode.gcm.u_tag.tag, c->lastiv, c->u_mode.gcm.u_tag.tag, 16); c->marks.tag = 1; } if (!check) { - memcpy (outbuf, c->u_tag.tag, outbuflen); + memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen); return GPG_ERR_NO_ERROR; } else { - return buf_eq_const(outbuf, c->u_tag.tag, outbuflen) ? + return buf_eq_const(outbuf, c->u_mode.gcm.u_tag.tag, outbuflen) ? GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; } diff --git a/cipher/cipher-internal.h b/cipher/cipher-internal.h index b4d0ff9d..a6e62717 100644 --- a/cipher/cipher-internal.h +++ b/cipher/cipher-internal.h @@ -39,6 +39,18 @@ #define GCM_USE_TABLES 1 +/* GCM_USE_INTEL_PCLMUL inidicates whether to compile GCM with Intel PCLMUL + code. */ +#undef GCM_USE_INTEL_PCLMUL +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define GCM_USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* GCM_USE_INTEL_PCLMUL */ + + /* A VIA processor with the Padlock engine as well as the Intel AES_NI instructions require an alignment of most data on a 16 byte boundary. Because we trick out the compiler while allocating the @@ -118,26 +130,10 @@ struct gcry_cipher_handle unsigned char ctr[MAX_BLOCKSIZE]; } u_ctr; - /* The interim tag for GCM mode. */ - union { - cipher_context_alignment_t iv_align; - unsigned char tag[MAX_BLOCKSIZE]; - } u_tag; - /* Space to save an IV or CTR for chaining operations. */ unsigned char lastiv[MAX_BLOCKSIZE]; int unused; /* Number of unused bytes in LASTIV. */ unsigned char length[MAX_BLOCKSIZE]; /* bit counters for GCM */ -#ifdef GCM_USE_TABLES - #if defined(HAVE_U64_TYPEDEF) && \ - (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__)) - #define GCM_TABLES_USE_U64 1 - u64 gcm_table[2 * 16]; /* pre-calculated table for GCM */ - #else - #undef GCM_TABLES_USE_U64 - u32 gcm_table[4 * 16]; /* pre-calculated table for GCM */ - #endif -#endif union { /* Mode specific storage for CCM mode. */ @@ -156,6 +152,7 @@ struct gcry_cipher_handle unsigned int lengths:1; /* Set to 1 if CCM length parameters has been processed. */ } ccm; + /* Mode specific storage for CMAC mode. */ struct { unsigned int tag:1; /* Set to 1 if tag has been finalized. */ @@ -163,8 +160,32 @@ struct gcry_cipher_handle /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */ unsigned char subkeys[2][MAX_BLOCKSIZE]; } cmac; - } u_mode; + /* Mode specific storage for GCM mode. */ + struct { + /* The interim tag for GCM mode. */ + union { + cipher_context_alignment_t iv_align; + unsigned char tag[MAX_BLOCKSIZE]; + } u_tag; + + /* Pre-calculated table for GCM. */ +#ifdef GCM_USE_TABLES + #if defined(HAVE_U64_TYPEDEF) && (SIZEOF_UNSIGNED_LONG == 8 \ + || defined(__x86_64__)) + #define GCM_TABLES_USE_U64 1 + u64 gcm_table[2 * 16]; + #else + #undef GCM_TABLES_USE_U64 + u32 gcm_table[4 * 16]; + #endif +#endif + +#ifdef GCM_USE_INTEL_PCLMUL + unsigned int use_intel_pclmul:1; +#endif + } gcm; + } u_mode; /* What follows are two contexts of the cipher in use. The first one needs to be aligned well enough for the cipher operation diff --git a/configure.ac b/configure.ac index c4f8776b..6d403436 100644 --- a/configure.ac +++ b/configure.ac @@ -567,6 +567,14 @@ AC_ARG_ENABLE(aesni-support, aesnisupport=$enableval,aesnisupport=yes) AC_MSG_RESULT($aesnisupport) +# Implementation of the --disable-pclmul-support switch. +AC_MSG_CHECKING([whether PCLMUL support is requested]) +AC_ARG_ENABLE(pclmul-support, + AC_HELP_STRING([--disable-pclmul-support], + [Disable support for the Intel PCLMUL instructions]), + pclmulsupport=$enableval,pclmulsupport=yes) +AC_MSG_RESULT($pclmulsupport) + # Implementation of the --disable-drng-support switch. AC_MSG_CHECKING([whether DRNG support is requested]) AC_ARG_ENABLE(drng-support, @@ -991,6 +999,23 @@ fi # +# Check whether GCC inline assembler supports PCLMUL instructions. +# +AC_CACHE_CHECK([whether GCC inline assembler supports PCLMUL instructions], + [gcry_cv_gcc_inline_asm_pclmul], + [gcry_cv_gcc_inline_asm_pclmul=no + AC_COMPILE_IFELSE([AC_LANG_SOURCE( + [[void a(void) { + __asm__("pclmulqdq \$0, %%xmm1, %%xmm3\n\t":::"cc"); + }]])], + [gcry_cv_gcc_inline_asm_pclmul=yes])]) +if test "$gcry_cv_gcc_inline_asm_pclmul" = "yes" ; then + AC_DEFINE(HAVE_GCC_INLINE_ASM_PCLMUL,1, + [Defined if inline assembler supports PCLMUL instructions]) +fi + + +# # Check whether GCC inline assembler supports AVX instructions # AC_CACHE_CHECK([whether GCC inline assembler supports AVX instructions], @@ -1369,6 +1394,11 @@ if test x"$aesnisupport" = xyes ; then aesnisupport="no (unsupported by compiler)" fi fi +if test x"$pclmulsupport" = xyes ; then + if test "$gcry_cv_gcc_inline_asm_pclmul" != "yes" ; then + pclmulsupport="no (unsupported by compiler)" + fi +fi if test x"$avxsupport" = xyes ; then if test "$gcry_cv_gcc_inline_asm_avx" != "yes" ; then avxsupport="no (unsupported by compiler)" @@ -1389,6 +1419,10 @@ if test x"$aesnisupport" = xyes ; then AC_DEFINE(ENABLE_AESNI_SUPPORT, 1, [Enable support for Intel AES-NI instructions.]) fi +if test x"$pclmulsupport" = xyes ; then + AC_DEFINE(ENABLE_PCLMUL_SUPPORT, 1, + [Enable support for Intel PCLMUL instructions.]) +fi if test x"$avxsupport" = xyes ; then AC_DEFINE(ENABLE_AVX_SUPPORT,1, [Enable support for Intel AVX instructions.]) @@ -1826,6 +1860,7 @@ GCRY_MSG_SHOW([Random number generator: ],[$random]) GCRY_MSG_SHOW([Using linux capabilities: ],[$use_capabilities]) GCRY_MSG_SHOW([Try using Padlock crypto: ],[$padlocksupport]) GCRY_MSG_SHOW([Try using AES-NI crypto: ],[$aesnisupport]) +GCRY_MSG_SHOW([Try using Intel PCLMUL: ],[$pclmulsupport]) GCRY_MSG_SHOW([Try using DRNG (RDRAND): ],[$drngsupport]) GCRY_MSG_SHOW([Try using Intel AVX: ],[$avxsupport]) GCRY_MSG_SHOW([Try using Intel AVX2: ],[$avx2support]) diff --git a/src/g10lib.h b/src/g10lib.h index ae4502cd..0612cbc7 100644 --- a/src/g10lib.h +++ b/src/g10lib.h @@ -167,6 +167,7 @@ int _gcry_log_verbosity( int level ); #define HWF_PADLOCK_SHA 4 #define HWF_PADLOCK_MMUL 8 +#define HWF_INTEL_PCLMUL 128 #define HWF_INTEL_AESNI 256 #define HWF_INTEL_RDRAND 512 #define HWF_INTEL_AVX 1024 diff --git a/src/global.c b/src/global.c index 44667cfb..841f1889 100644 --- a/src/global.c +++ b/src/global.c @@ -66,6 +66,7 @@ static struct { HWF_PADLOCK_AES, "padlock-aes" }, { HWF_PADLOCK_SHA, "padlock-sha" }, { HWF_PADLOCK_MMUL,"padlock-mmul"}, + { HWF_INTEL_PCLMUL,"intel-pclmul" }, { HWF_INTEL_AESNI, "intel-aesni" }, { HWF_INTEL_RDRAND,"intel-rdrand" }, { HWF_INTEL_AVX, "intel-avx" }, diff --git a/src/hwf-x86.c b/src/hwf-x86.c index 2ceb04c8..784fe2a4 100644 --- a/src/hwf-x86.c +++ b/src/hwf-x86.c @@ -201,6 +201,11 @@ detect_x86_gnuc (void) /* Get CPU info and Intel feature flags (ECX). */ get_cpuid(1, NULL, NULL, &features, NULL); +#ifdef ENABLE_PCLMUL_SUPPORT + /* Test bit 1 for PCLMUL. */ + if (features & 0x00000002) + result |= HWF_INTEL_PCLMUL; +#endif #ifdef ENABLE_AESNI_SUPPORT /* Test bit 25 for AES-NI. */ if (features & 0x02000000) |