diff options
author | JonasZhou <JonasZhou@zhaoxin.com> | 2020-09-15 16:36:57 +0800 |
---|---|---|
committer | JonasZhou <JonasZhou@zhaoxin.com> | 2020-09-24 10:06:03 +0800 |
commit | 2c5ca0a8c771ed952c432dd5ba271719896d0d54 (patch) | |
tree | b4b3a63fc54cc3f76fe7f4935aae1093040a03f2 | |
parent | 7202e921db9da6faddcab8d2ac872508ee2281be (diff) | |
download | gnutls-2c5ca0a8c771ed952c432dd5ba271719896d0d54.tar.gz |
x86:add detection of instruction set on Zhaoxin CPU
Add detection of extended instruction set on Zhaoxin cpu,e.g:ssse3,sha,
etc. Set the priority of the algorithm according to the benchmark
test result on Zhaoxin cpu.
Signed-off-by: JonasZhou <JonasZhou@zhaoxin.com>
-rw-r--r-- | lib/accelerated/x86/hmac-padlock.c | 2 | ||||
-rw-r--r-- | lib/accelerated/x86/sha-padlock.c | 2 | ||||
-rw-r--r-- | lib/accelerated/x86/sha-padlock.h | 4 | ||||
-rw-r--r-- | lib/accelerated/x86/x86-common.c | 406 |
4 files changed, 361 insertions, 53 deletions
diff --git a/lib/accelerated/x86/hmac-padlock.c b/lib/accelerated/x86/hmac-padlock.c index be6c55bc33..fd81f5c5fd 100644 --- a/lib/accelerated/x86/hmac-padlock.c +++ b/lib/accelerated/x86/hmac-padlock.c @@ -357,7 +357,7 @@ const gnutls_crypto_mac_st _gnutls_hmac_sha_padlock = { .fast = wrap_padlock_hmac_fast }; -const gnutls_crypto_mac_st _gnutls_hmac_sha_padlock_nano = { +const gnutls_crypto_mac_st _gnutls_hmac_sha_padlock_enhance = { .init = wrap_padlock_hmac_init, .setkey = wrap_padlock_hmac_setkey, .setnonce = NULL, diff --git a/lib/accelerated/x86/sha-padlock.c b/lib/accelerated/x86/sha-padlock.c index 1030d4f63e..c40a3e805e 100644 --- a/lib/accelerated/x86/sha-padlock.c +++ b/lib/accelerated/x86/sha-padlock.c @@ -383,7 +383,7 @@ const gnutls_crypto_digest_st _gnutls_sha_padlock = { .fast = wrap_padlock_hash_fast }; -const gnutls_crypto_digest_st _gnutls_sha_padlock_nano = { +const gnutls_crypto_digest_st _gnutls_sha_padlock_enhance = { .init = wrap_padlock_hash_init, .hash = wrap_padlock_hash_update, .output = wrap_padlock_hash_output, diff --git a/lib/accelerated/x86/sha-padlock.h b/lib/accelerated/x86/sha-padlock.h index af67a07dd7..f626f17e1b 100644 --- a/lib/accelerated/x86/sha-padlock.h +++ b/lib/accelerated/x86/sha-padlock.h @@ -30,7 +30,7 @@ extern const struct nettle_hash padlock_sha256; extern const struct nettle_hash padlock_sha384; extern const struct nettle_hash padlock_sha512; -extern const gnutls_crypto_mac_st _gnutls_hmac_sha_padlock_nano; -extern const gnutls_crypto_digest_st _gnutls_sha_padlock_nano; +extern const gnutls_crypto_mac_st _gnutls_hmac_sha_padlock_enhance; +extern const gnutls_crypto_digest_st _gnutls_sha_padlock_enhance; #endif /* GNUTLS_LIB_ACCELERATED_X86_SHA_PADLOCK_H */ diff --git a/lib/accelerated/x86/x86-common.c b/lib/accelerated/x86/x86-common.c index 29410e51fd..33fa40d4a3 100644 --- a/lib/accelerated/x86/x86-common.c +++ b/lib/accelerated/x86/x86-common.c @@ -90,9 +90,9 @@ unsigned int _gnutls_x86_cpuid_s[4]; # define bit_MOVBE 0x00400000 #endif -#define via_bit_PADLOCK (0x3 << 6) -#define via_bit_PADLOCK_PHE (0x3 << 10) -#define via_bit_PADLOCK_PHE_SHA512 (0x3 << 25) +#define bit_PADLOCK (0x3 << 6) +#define bit_PADLOCK_PHE (0x3 << 10) +#define bit_PADLOCK_PHE_SHA512 (0x3 << 25) /* Our internal bit-string for cpu capabilities. Should be set * in GNUTLS_CPUID_OVERRIDE */ @@ -102,9 +102,9 @@ unsigned int _gnutls_x86_cpuid_s[4]; #define INTEL_PCLMUL (1<<3) #define INTEL_AVX (1<<4) #define INTEL_SHA (1<<5) -#define VIA_PADLOCK (1<<20) -#define VIA_PADLOCK_PHE (1<<21) -#define VIA_PADLOCK_PHE_SHA512 (1<<22) +#define PADLOCK (1<<20) +#define PADLOCK_PHE (1<<21) +#define PADLOCK_PHE_SHA512 (1<<22) #ifndef HAVE_GET_CPUID_COUNT static inline void @@ -246,39 +246,37 @@ static unsigned check_pclmul(void) #endif #ifdef ENABLE_PADLOCK -static unsigned capabilities_to_via_edx(unsigned capabilities) +static unsigned capabilities_to_zhaoxin_edx(unsigned capabilities) { unsigned a,b,c,t; - memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s)); - if (capabilities & EMPTY_SET) { return 0; } if (!__get_cpuid(1, &t, &a, &b, &c)) return 0; - if (capabilities & VIA_PADLOCK) { - if (c & via_bit_PADLOCK) { - _gnutls_x86_cpuid_s[2] |= via_bit_PADLOCK; + if (capabilities & PADLOCK) { + if (c & bit_PADLOCK) { + _gnutls_x86_cpuid_s[2] |= bit_PADLOCK; } else { _gnutls_debug_log ("Padlock acceleration requested but not available\n"); } } - if (capabilities & VIA_PADLOCK_PHE) { - if (c & via_bit_PADLOCK_PHE) { - _gnutls_x86_cpuid_s[2] |= via_bit_PADLOCK_PHE; + if (capabilities & PADLOCK_PHE) { + if (c & bit_PADLOCK_PHE) { + _gnutls_x86_cpuid_s[2] |= bit_PADLOCK_PHE; } else { _gnutls_debug_log ("Padlock-PHE acceleration requested but not available\n"); } } - if (capabilities & VIA_PADLOCK_PHE_SHA512) { - if (c & via_bit_PADLOCK_PHE_SHA512) { - _gnutls_x86_cpuid_s[2] |= via_bit_PADLOCK_PHE_SHA512; + if (capabilities & PADLOCK_PHE_SHA512) { + if (c & bit_PADLOCK_PHE_SHA512) { + _gnutls_x86_cpuid_s[2] |= bit_PADLOCK_PHE_SHA512; } else { _gnutls_debug_log ("Padlock-PHE-SHA512 acceleration requested but not available\n"); @@ -290,18 +288,36 @@ static unsigned capabilities_to_via_edx(unsigned capabilities) static int check_padlock(unsigned edx) { - return ((edx & via_bit_PADLOCK) == via_bit_PADLOCK); + return ((edx & bit_PADLOCK) == bit_PADLOCK); } static int check_phe(unsigned edx) { - return ((edx & via_bit_PADLOCK_PHE) == via_bit_PADLOCK_PHE); + return ((edx & bit_PADLOCK_PHE) == bit_PADLOCK_PHE); } /* We are actually checking for SHA512 */ static int check_phe_sha512(unsigned edx) { - return ((edx & via_bit_PADLOCK_PHE_SHA512) == via_bit_PADLOCK_PHE_SHA512); + return ((edx & bit_PADLOCK_PHE_SHA512) == bit_PADLOCK_PHE_SHA512); +} + +/* On some of the Zhaoxin CPUs, pclmul has a faster acceleration effect */ +static int check_fast_pclmul(void) +{ + unsigned int a,b,c,d; + a = b = c = d = 0; + if (__get_cpuid(1, &a, &b, &c, &d)) + return 0; + + unsigned int family = ((a >> 8) & 0x0F); + unsigned int model = ((a >> 4) & 0x0F) + ((a >> 12) & 0xF0); + + if(((family == 0x6) && (model == 0xf || model == 0x19)) || + ((family == 0x7) && (model == 0x1B || model == 0x3B))) + return 1; + else + return 0; } static int check_phe_partial(void) @@ -326,7 +342,7 @@ static int check_phe_partial(void) return 0; } -static unsigned check_via(void) +static unsigned check_zhaoxin(void) { unsigned int a, b, c, d; @@ -334,7 +350,10 @@ static unsigned check_via(void) return 0; if ((memcmp(&b, "Cent", 4) == 0 && - memcmp(&d, "aurH", 4) == 0 && memcmp(&c, "auls", 4) == 0)) { + memcmp(&d, "aurH", 4) == 0 && + memcmp(&c, "auls", 4) == 0) || + (memcmp(&b, " Sh", 4) == 0 && + memcmp(&d, "angh", 4) == 0 && memcmp(&c, "ai ", 4) == 0)) { return 1; } @@ -347,13 +366,301 @@ void register_x86_padlock_crypto(unsigned capabilities) int ret, phe; unsigned edx; - if (check_via() == 0) + memset(_gnutls_x86_cpuid_s, 0, sizeof(_gnutls_x86_cpuid_s)); + if (check_zhaoxin() == 0) return; - if (capabilities == 0) + if (capabilities == 0){ + if(!read_cpuid_vals(_gnutls_x86_cpuid_s)) + return; edx = padlock_capability(); - else - edx = capabilities_to_via_edx(capabilities); + } else{ + capabilities_to_intel_cpuid(capabilities); + edx = capabilities_to_zhaoxin_edx(capabilities); + } + + if (check_ssse3()) { + _gnutls_debug_log("Zhaoxin SSSE3 was detected\n"); + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_GCM, 90, + &_gnutls_aes_gcm_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_GCM, 90, + &_gnutls_aes_gcm_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_GCM, 90, + &_gnutls_aes_gcm_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_CBC, 90, &_gnutls_aes_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_CBC, 90, &_gnutls_aes_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_CBC, 90, &_gnutls_aes_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + } + + if (check_sha() || check_ssse3()) { + if (check_sha()) + _gnutls_debug_log("Zhaoxin SHA was detected\n"); + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1, + 80, + &_gnutls_sha_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224, + 80, + &_gnutls_sha_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256, + 80, + &_gnutls_sha_x86_ssse3, 0); + if (ret < 0) { + gnutls_assert(); + } + + + ret = + gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1, + 80, + &_gnutls_hmac_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA224, + 80, + &_gnutls_hmac_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256, + 80, + &_gnutls_hmac_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA384, + 80, + &_gnutls_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA512, + 80, + &_gnutls_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + ret = + gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA384, + 80, + &_gnutls_hmac_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + + ret = + gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA512, + 80, + &_gnutls_hmac_sha_x86_ssse3, 0); + if (ret < 0) + gnutls_assert(); + } + + if (check_optimized_aes()) { + _gnutls_debug_log("Zhaoxin AES accelerator was detected\n"); + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_CBC, 80, &_gnutls_aesni_x86, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_CBC, 80, &_gnutls_aesni_x86, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_CBC, 80, &_gnutls_aesni_x86, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_CCM, 80, + &_gnutls_aes_ccm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_CCM, 80, + &_gnutls_aes_ccm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_CCM_8, 80, + &_gnutls_aes_ccm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_CCM_8, 80, + &_gnutls_aes_ccm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_XTS, 80, + &_gnutls_aes_xts_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_XTS, 80, + &_gnutls_aes_xts_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + +#ifdef ASM_X86_64 + if (check_pclmul()) { + /* register GCM ciphers */ + _gnutls_debug_log + ("Zhaoxin GCM accelerator was detected\n"); + if (check_avx_movbe() && !check_fast_pclmul()) { + _gnutls_debug_log + ("Zhaoxin GCM accelerator (AVX) was detected\n"); + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_GCM, 80, + &_gnutls_aes_gcm_pclmul_avx, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_GCM, 80, + &_gnutls_aes_gcm_pclmul_avx, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_GCM, 80, + &_gnutls_aes_gcm_pclmul_avx, 0); + if (ret < 0) { + gnutls_assert(); + } + } else { + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_GCM, 80, + &_gnutls_aes_gcm_pclmul, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_GCM, 80, + &_gnutls_aes_gcm_pclmul, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_GCM, 80, + &_gnutls_aes_gcm_pclmul, 0); + if (ret < 0) { + gnutls_assert(); + } + } + } else +#endif + { + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_128_GCM, 80, + &_gnutls_aes_gcm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_192_GCM, 80, + &_gnutls_aes_gcm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + + ret = + gnutls_crypto_single_cipher_register + (GNUTLS_CIPHER_AES_256_GCM, 80, + &_gnutls_aes_gcm_x86_aesni, 0); + if (ret < 0) { + gnutls_assert(); + } + } + } if (check_padlock(edx)) { _gnutls_debug_log @@ -368,7 +675,7 @@ void register_x86_padlock_crypto(unsigned capabilities) /* register GCM ciphers */ ret = gnutls_crypto_single_cipher_register - (GNUTLS_CIPHER_AES_128_GCM, 80, + (GNUTLS_CIPHER_AES_128_GCM, 90, &_gnutls_aes_gcm_padlock, 0); if (ret < 0) { gnutls_assert(); @@ -383,15 +690,16 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_cipher_register - (GNUTLS_CIPHER_AES_256_GCM, 80, + (GNUTLS_CIPHER_AES_256_GCM, 90, &_gnutls_aes_gcm_padlock, 0); if (ret < 0) { gnutls_assert(); } - } else { - _gnutls_priority_update_non_aesni(); } + if(!check_optimized_aes() && !check_padlock(edx)) + _gnutls_priority_update_non_aesni(); + #ifdef HAVE_LIBNETTLE phe = check_phe(edx); @@ -404,7 +712,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_digest_register (GNUTLS_DIG_SHA384, 80, - &_gnutls_sha_padlock_nano, 0); + &_gnutls_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -412,7 +720,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_digest_register (GNUTLS_DIG_SHA512, 80, - &_gnutls_sha_padlock_nano, 0); + &_gnutls_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -420,7 +728,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_mac_register (GNUTLS_MAC_SHA384, 80, - &_gnutls_hmac_sha_padlock_nano, 0); + &_gnutls_hmac_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -428,7 +736,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_mac_register (GNUTLS_MAC_SHA512, 80, - &_gnutls_hmac_sha_padlock_nano, 0); + &_gnutls_hmac_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -436,32 +744,32 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1, - 80, - &_gnutls_sha_padlock_nano, 0); + 90, + &_gnutls_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } ret = gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA224, - 80, - &_gnutls_sha_padlock_nano, 0); + 90, + &_gnutls_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } ret = gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256, - 80, - &_gnutls_sha_padlock_nano, 0); + 90, + &_gnutls_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } ret = gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1, - 80, - &_gnutls_hmac_sha_padlock_nano, 0); + 90, + &_gnutls_hmac_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -470,8 +778,8 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256, - 80, - &_gnutls_hmac_sha_padlock_nano, 0); + 90, + &_gnutls_hmac_sha_padlock_enhance, 0); if (ret < 0) { gnutls_assert(); } @@ -482,7 +790,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ("Padlock SHA1 and SHA256 accelerator was detected\n"); ret = gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA1, - 80, + 90, &_gnutls_sha_padlock, 0); if (ret < 0) { gnutls_assert(); @@ -490,7 +798,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_digest_register(GNUTLS_DIG_SHA256, - 80, + 90, &_gnutls_sha_padlock, 0); if (ret < 0) { gnutls_assert(); @@ -498,7 +806,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA1, - 80, + 90, &_gnutls_hmac_sha_padlock, 0); if (ret < 0) { gnutls_assert(); @@ -506,7 +814,7 @@ void register_x86_padlock_crypto(unsigned capabilities) ret = gnutls_crypto_single_mac_register(GNUTLS_MAC_SHA256, - 80, + 90, &_gnutls_hmac_sha_padlock, 0); if (ret < 0) { gnutls_assert(); |