diff options
Diffstat (limited to 'FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c')
-rw-r--r-- | FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c | 7579 |
1 files changed, 6193 insertions, 1386 deletions
diff --git a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c index 85f01a0d1..4b5b437ca 100644 --- a/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c +++ b/FreeRTOS-Plus/Source/WolfSSL/wolfcrypt/src/aes.c @@ -1,8 +1,8 @@ /* aes.c * - * Copyright (C) 2006-2015 wolfSSL Inc. + * Copyright (C) 2006-2020 wolfSSL Inc. * - * This file is part of wolfSSL. (formerly known as CyaSSL) + * This file is part of wolfSSL. * * wolfSSL is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -16,194 +16,458 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ + #ifdef HAVE_CONFIG_H #include <config.h> #endif #include <wolfssl/wolfcrypt/settings.h> +#include <wolfssl/wolfcrypt/error-crypt.h> -#ifndef NO_AES +#if !defined(NO_AES) -#include <wolfssl/wolfcrypt/aes.h> +/* Tip: Locate the software cipher modes by searching for "Software AES" */ -#ifdef HAVE_FIPS -int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv, - int dir) -{ - return AesSetKey_fips(aes, key, len, iv, dir); -} +#if defined(HAVE_FIPS) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + /* set NO_WRAPPERS before headers, use direct internal f()s not wrappers */ + #define FIPS_NO_WRAPPERS -int wc_AesSetIV(Aes* aes, const byte* iv) -{ - return AesSetIV_fips(aes, iv); -} + #ifdef USE_WINDOWS_API + #pragma code_seg(".fipsA$g") + #pragma const_seg(".fipsB$g") + #endif +#endif +#include <wolfssl/wolfcrypt/aes.h> +#include <wolfssl/wolfcrypt/cpuid.h> -int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) -{ - return AesCbcEncrypt_fips(aes, out, in, sz); -} +#ifdef WOLF_CRYPTO_CB + #include <wolfssl/wolfcrypt/cryptocb.h> +#endif -int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) -{ - return AesCbcDecrypt_fips(aes, out, in, sz); -} +/* fips wrapper calls, user can call direct */ +#if defined(HAVE_FIPS) && \ + (!defined(HAVE_FIPS_VERSION) || (HAVE_FIPS_VERSION < 2)) + int wc_AesSetKey(Aes* aes, const byte* key, word32 len, const byte* iv, + int dir) + { + if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } -int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, - const byte* key, word32 keySz, const byte* iv) -{ - return AesCbcDecryptWithKey(out, in, inSz, key, keySz, iv); -} + return AesSetKey_fips(aes, key, len, iv, dir); + } + int wc_AesSetIV(Aes* aes, const byte* iv) + { + if (aes == NULL) { + return BAD_FUNC_ARG; + } + return AesSetIV_fips(aes, iv); + } + #ifdef HAVE_AES_CBC + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } -/* AES-CTR */ -#ifdef WOLFSSL_AES_COUNTER -void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) -{ - AesCtrEncrypt(aes, out, in, sz); -} -#endif + return AesCbcEncrypt_fips(aes, out, in, sz); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } -/* AES-DIRECT */ -#if defined(WOLFSSL_AES_DIRECT) -void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) -{ - AesEncryptDirect(aes, out, in); -} + return AesCbcDecrypt_fips(aes, out, in, sz); + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + /* AES-CTR */ + #ifdef WOLFSSL_AES_COUNTER + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } -void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) -{ - AesDecryptDirect(aes, out, in); -} + return AesCtrEncrypt(aes, out, in, sz); + } + #endif + /* AES-DIRECT */ + #if defined(WOLFSSL_AES_DIRECT) + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + AesEncryptDirect(aes, out, in); + } -int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, - const byte* iv, int dir) -{ - return AesSetKeyDirect(aes, key, len, iv, dir); -} -#endif + #ifdef HAVE_AES_DECRYPT + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + AesDecryptDirect(aes, out, in); + } + #endif /* HAVE_AES_DECRYPT */ + int wc_AesSetKeyDirect(Aes* aes, const byte* key, word32 len, + const byte* iv, int dir) + { + return AesSetKeyDirect(aes, key, len, iv, dir); + } + #endif /* WOLFSSL_AES_DIRECT */ -#ifdef HAVE_AESGCM -int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) -{ - return AesGcmSetKey_fips(aes, key, len); -} + /* AES-GCM */ + #ifdef HAVE_AESGCM + int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) + { + if (aes == NULL || !( (len == 16) || (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } + return AesGcmSetKey_fips(aes, key, len); + } + int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || + authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ || + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } -int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) -{ - return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); -} + return AesGcmEncrypt_fips(aes, out, in, sz, iv, ivSz, authTag, + authTagSz, authIn, authInSz); + } + #ifdef HAVE_AES_DECRYPT + int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + if (aes == NULL || out == NULL || in == NULL || iv == NULL + || authTag == NULL || authTagSz > AES_BLOCK_SIZE || + ivSz == 0 || ivSz > AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } -int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, - const byte* iv, word32 ivSz, - const byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) -{ - return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, authTagSz, - authIn, authInSz); -} + return AesGcmDecrypt_fips(aes, out, in, sz, iv, ivSz, authTag, + authTagSz, authIn, authInSz); + } + #endif /* HAVE_AES_DECRYPT */ + int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) + { + if (gmac == NULL || key == NULL || !((len == 16) || + (len == 24) || (len == 32)) ) { + return BAD_FUNC_ARG; + } -int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) -{ - return GmacSetKey(gmac, key, len); -} + return GmacSetKey(gmac, key, len); + } + int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz) + { + if (gmac == NULL || authTagSz > AES_BLOCK_SIZE || + authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + return BAD_FUNC_ARG; + } + return GmacUpdate(gmac, iv, ivSz, authIn, authInSz, + authTag, authTagSz); + } + #endif /* HAVE_AESGCM */ -int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, - const byte* authIn, word32 authInSz, - byte* authTag, word32 authTagSz) -{ - return GmacUpdate(gmac, iv, ivSz, authIn, authInSz, - authTag, authTagSz); -} + /* AES-CCM */ + #if defined(HAVE_AESCCM) && \ + defined(HAVE_FIPS_VERSION) && (HAVE_FIPS_VERSION >= 2) + int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) + { + return AesCcmSetKey(aes, key, keySz); + } + int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; -#endif /* HAVE_AESGCM */ -#ifdef HAVE_AESCCM -void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) -{ - AesCcmSetKey(aes, key, keySz); -} + AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, + authTagSz, authIn, authInSz); + return 0; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCcmDecrypt(Aes* aes, byte* out, + const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) + { -void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, - const byte* nonce, word32 nonceSz, - byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) -{ - AesCcmEncrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, - authIn, authInSz); -} + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) { + return BAD_FUNC_ARG; + } + return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, + authTag, authTagSz, authIn, authInSz); + } + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AESCCM && HAVE_FIPS_VERSION 2 */ -int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, - const byte* nonce, word32 nonceSz, - const byte* authTag, word32 authTagSz, - const byte* authIn, word32 authInSz) -{ - return AesCcmDecrypt(aes, out, in, inSz, nonce, nonceSz, authTag, authTagSz, - authIn, authInSz); -} -#endif /* HAVE_AESCCM */ + int wc_AesInit(Aes* aes, void* h, int i) + { + if (aes == NULL) + return BAD_FUNC_ARG; -#ifdef HAVE_CAVIUM -int wc_AesInitCavium(Aes* aes, int i) -{ - return AesInitCavium(aes, i); -} + (void)h; + (void)i; + /* FIPS doesn't support: + return AesInit(aes, h, i); */ + return 0; + } + void wc_AesFree(Aes* aes) + { + (void)aes; + /* FIPS doesn't support: + AesFree(aes); */ + } -void wc_AesFreeCavium(Aes* aes) -{ - AesFreeCavium(aes); -} -#endif -#else /* HAVE_FIPS */ +#else /* else build without fips, or for FIPS v2 */ -#ifdef WOLFSSL_TI_CRYPT -#include <wolfcrypt/src/port/ti/ti-aes.c> + +#if defined(WOLFSSL_TI_CRYPT) + #include <wolfcrypt/src/port/ti/ti-aes.c> #else -#include <wolfssl/wolfcrypt/error-crypt.h> #include <wolfssl/wolfcrypt/logging.h> + #ifdef NO_INLINE #include <wolfssl/wolfcrypt/misc.h> #else + #define WOLFSSL_MISC_INCLUDED #include <wolfcrypt/src/misc.c> #endif + +#if !defined(WOLFSSL_ARMASM) + +#ifdef WOLFSSL_IMX6_CAAM_BLOB + /* case of possibly not using hardware acceleration for AES but using key + blobs */ + #include <wolfssl/wolfcrypt/port/caam/wolfcaam.h> +#endif + #ifdef DEBUG_AESNI #include <stdio.h> #endif - #ifdef _MSC_VER /* 4127 warning constant while(1) */ #pragma warning(disable: 4127) #endif -#if defined(STM32F2_CRYPTO) - /* STM32F2 hardware AES support for CBC, CTR modes through the STM32F2 - * Standard Peripheral Library. Documentation located in STM32F2xx - * Standard Peripheral Library document (See note in README). - * NOTE: no support for AES-GCM/CCM/Direct */ - #include "stm32f2xx.h" - #include "stm32f2xx_cryp.h" +/* Define AES implementation includes and functions */ +#if defined(STM32_CRYPTO) + /* STM32F2/F4/F7/L4 hardware AES support for ECB, CBC, CTR and GCM modes */ + +#if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) || defined(HAVE_AESCCM) + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESECB_Encrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* setup key */ + CRYP_KeyInit(&keyInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&inBlock[0]); + CRYP_DataIn(*(uint32_t*)&inBlock[4]); + CRYP_DataIn(*(uint32_t*)&inBlock[8]); + CRYP_DataIn(*(uint32_t*)&inBlock[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&outBlock[0] = CRYP_DataOut(); + *(uint32_t*)&outBlock[4] = CRYP_DataOut(); + *(uint32_t*)&outBlock[8] = CRYP_DataOut(); + *(uint32_t*)&outBlock[12] = CRYP_DataOut(); + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + #endif /* WOLFSSL_STM32_CUBEMX */ + + return ret; + } +#endif /* WOLFSSL_AES_DIRECT || HAVE_AESGCM || HAVE_AESCCM */ + +#ifdef HAVE_AES_DECRYPT + #if defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESCCM) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #else + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + #endif + + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_ECB; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_ECB; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)inBlock, AES_BLOCK_SIZE, + (uint32_t*)outBlock, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESECB_Decrypt(&hcryp, (uint8_t*)inBlock, AES_BLOCK_SIZE, + outBlock, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); + + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* wait until decrypt key has been initialized */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_ECB; + CRYP_Init(&cryptInit); + + /* enable crypto processor */ + CRYP_Cmd(ENABLE); + + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); + + CRYP_DataIn(*(uint32_t*)&inBlock[0]); + CRYP_DataIn(*(uint32_t*)&inBlock[4]); + CRYP_DataIn(*(uint32_t*)&inBlock[8]); + CRYP_DataIn(*(uint32_t*)&inBlock[12]); + + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + + *(uint32_t*)&outBlock[0] = CRYP_DataOut(); + *(uint32_t*)&outBlock[4] = CRYP_DataOut(); + *(uint32_t*)&outBlock[8] = CRYP_DataOut(); + *(uint32_t*)&outBlock[12] = CRYP_DataOut(); + + /* disable crypto processor */ + CRYP_Cmd(DISABLE); + #endif /* WOLFSSL_STM32_CUBEMX */ + + return ret; + } + #endif /* WOLFSSL_AES_DIRECT || HAVE_AESCCM */ +#endif /* HAVE_AES_DECRYPT */ + #elif defined(HAVE_COLDFIRE_SEC) /* Freescale Coldfire SEC support for CBC mode. * NOTE: no support for AES-CTR/GCM/CCM/Direct */ @@ -211,34 +475,453 @@ void wc_AesFreeCavium(Aes* aes) #include "sec.h" #include "mcf5475_sec.h" #include "mcf5475_siu.h" +#elif defined(FREESCALE_LTC) + #include "fsl_ltc.h" + #if defined(FREESCALE_LTC_AES_GCM) + #undef NEED_AES_TABLES + #undef GCM_TABLE + #else + /* if LTC doesn't have GCM, use software with LTC AES ECB mode */ + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesEncryptDirect(aes, outBlock, inBlock); + return 0; + } + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesDecryptDirect(aes, outBlock, inBlock); + return 0; + } + #endif #elif defined(FREESCALE_MMCAU) /* Freescale mmCAU hardware AES support for Direct, CBC, CCM, GCM modes * through the CAU/mmCAU library. Documentation located in * ColdFire/ColdFire+ CAU and Kinetis mmCAU Software Library User - * Guide (See note in README). - * NOTE: no support for AES-CTR */ - #include "cau_api.h" + * Guide (See note in README). */ + #ifdef FREESCALE_MMCAU_CLASSIC + /* MMCAU 1.4 library used with non-KSDK / classic MQX builds */ + #include "cau_api.h" + #else + #include "fsl_mmcau.h" + #endif + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_aes_encrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + #else + MMCAU_AES_EncryptEcb(inBlock, (byte*)aes->key, aes->rounds, + outBlock); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + #ifdef HAVE_AES_DECRYPT + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + int ret; + + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)outBlock % WOLFSSL_MMCAU_ALIGNMENT) { + WOLFSSL_MSG("Bad cau_aes_decrypt alignment"); + return BAD_ALIGN_E; + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_decrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + #else + MMCAU_AES_DecryptEcb(inBlock, (byte*)aes->key, aes->rounds, + outBlock); + #endif + wolfSSL_CryptHwMutexUnLock(); + } + return ret; + } + #endif /* HAVE_AES_DECRYPT */ + #elif defined(WOLFSSL_PIC32MZ_CRYPT) - /* NOTE: no support for AES-CCM/Direct */ - #define DEBUG_WOLFSSL - #include "wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h" -#elif defined(HAVE_CAVIUM) - #include <wolfssl/wolfcrypt/logging.h> - #include "cavium_common.h" - - /* still leave SW crypto available */ + + #include <wolfssl/wolfcrypt/port/pic32/pic32mz-crypt.h> + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, + outBlock, inBlock, AES_BLOCK_SIZE, + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_Pic32AesCrypt(aes->key, aes->keylen, NULL, 0, + outBlock, inBlock, AES_BLOCK_SIZE, + PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RECB); + } + #endif + +#elif defined(WOLFSSL_NRF51_AES) + /* Use built-in AES hardware - AES 128 ECB Encrypt Only */ + #include "wolfssl/wolfcrypt/port/nrf51.h" + + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return nrf51_aes_encrypt(inBlock, (byte*)aes->key, aes->rounds, outBlock); + } + + #ifdef HAVE_AES_DECRYPT + #error nRF51 AES Hardware does not support decrypt + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + #include "wolfssl/wolfcrypt/port/Espressif/esp32-crypt.h" + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesEncrypt(aes, inBlock, outBlock); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return wc_esp32AesDecrypt(aes, inBlock, outBlock); + } + #endif + +#elif defined(WOLFSSL_AESNI) + #define NEED_AES_TABLES - static int wc_AesCaviumSetKey(Aes* aes, const byte* key, word32 length, - const byte* iv); - static int wc_AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in, - word32 length); - static int wc_AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in, - word32 length); + /* Each platform needs to query info type 1 from cpuid to see if aesni is + * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts + */ + + #ifndef AESNI_ALIGN + #define AESNI_ALIGN 16 + #endif + + #ifdef _MSC_VER + #define XASM_LINK(f) + #elif defined(__APPLE__) + #define XASM_LINK(f) asm("_" f) + #else + #define XASM_LINK(f) asm(f) + #endif /* _MSC_VER */ + + static int checkAESNI = 0; + static int haveAESNI = 0; + static word32 intel_flags = 0; + + static int Check_CPU_support_AES(void) + { + intel_flags = cpuid_get_flags(); + + return IS_INTEL_AESNI(intel_flags) != 0; + } + + + /* tell C compiler these are asm functions in case any mix up of ABI underscore + prefix between clang/gcc/llvm etc */ + #ifdef HAVE_AES_CBC + void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_encrypt"); + + #ifdef HAVE_AES_DECRYPT + #if defined(WOLFSSL_AESNI_BY4) + void AES_CBC_decrypt_by4(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by4"); + #elif defined(WOLFSSL_AESNI_BY6) + void AES_CBC_decrypt_by6(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by6"); + #else /* WOLFSSL_AESNI_BYx */ + void AES_CBC_decrypt_by8(const unsigned char* in, unsigned char* out, + unsigned char* ivec, unsigned long length, + const unsigned char* KS, int nr) + XASM_LINK("AES_CBC_decrypt_by8"); + #endif /* WOLFSSL_AESNI_BYx */ + #endif /* HAVE_AES_DECRYPT */ + #endif /* HAVE_AES_CBC */ + + void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, + unsigned long length, const unsigned char* KS, int nr) + XASM_LINK("AES_ECB_encrypt"); + + #ifdef HAVE_AES_DECRYPT + void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, + unsigned long length, const unsigned char* KS, int nr) + XASM_LINK("AES_ECB_decrypt"); + #endif + + void AES_128_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_128_Key_Expansion"); + + void AES_192_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_192_Key_Expansion"); + + void AES_256_Key_Expansion(const unsigned char* userkey, + unsigned char* key_schedule) + XASM_LINK("AES_256_Key_Expansion"); + + + static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, + Aes* aes) + { + int ret; + + if (!userKey || !aes) + return BAD_FUNC_ARG; + + switch (bits) { + case 128: + AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10; + return 0; + case 192: + AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12; + return 0; + case 256: + AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14; + return 0; + default: + ret = BAD_FUNC_ARG; + } + + return ret; + } + + #ifdef HAVE_AES_DECRYPT + static int AES_set_decrypt_key(const unsigned char* userKey, + const int bits, Aes* aes) + { + int nr; + Aes temp_key; + __m128i *Key_Schedule = (__m128i*)aes->key; + __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key; + + if (!userKey || !aes) + return BAD_FUNC_ARG; + + if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG) + return BAD_FUNC_ARG; + + nr = temp_key.rounds; + aes->rounds = nr; + + Key_Schedule[nr] = Temp_Key_Schedule[0]; + Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); + Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); + Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]); + Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]); + Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]); + Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]); + Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]); + Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]); + Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]); + + if (nr>10) { + Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]); + Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]); + } + + if (nr>12) { + Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]); + Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]); + } + + Key_Schedule[0] = Temp_Key_Schedule[nr]; + + return 0; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif (defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES)) || \ + ((defined(WOLFSSL_AFALG) || defined(WOLFSSL_DEVCRYPTO_AES)) && \ + defined(HAVE_AESCCM)) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + wc_AesEncryptDirect(aes, outBlock, inBlock); + return 0; + } + +#elif defined(WOLFSSL_AFALG) +#elif defined(WOLFSSL_DEVCRYPTO_AES) + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + #include "hal_data.h" + + #ifndef WOLFSSL_SCE_AES256_HANDLE + #define WOLFSSL_SCE_AES256_HANDLE g_sce_aes_256 + #endif + + #ifndef WOLFSSL_SCE_AES192_HANDLE + #define WOLFSSL_SCE_AES192_HANDLE g_sce_aes_192 + #endif + + #ifndef WOLFSSL_SCE_AES128_HANDLE + #define WOLFSSL_SCE_AES128_HANDLE g_sce_aes_128 + #endif + + static int AES_ECB_encrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->encrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, + NULL, (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + + if (ret != SSP_SUCCESS) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + return 0; + } + + #if defined(HAVE_AES_DECRYPT) + static int AES_ECB_decrypt(Aes* aes, const byte* inBlock, byte* outBlock, + int sz) + { + uint32_t ret; + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + + switch (aes->keylen) { + #ifdef WOLFSSL_AES_128 + case AES_128_KEY_SIZE: + ret = WOLFSSL_SCE_AES128_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES128_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_192 + case AES_192_KEY_SIZE: + ret = WOLFSSL_SCE_AES192_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES192_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + #ifdef WOLFSSL_AES_256 + case AES_256_KEY_SIZE: + ret = WOLFSSL_SCE_AES256_HANDLE.p_api->decrypt( + WOLFSSL_SCE_AES256_HANDLE.p_ctrl, aes->key, aes->reg, + (sz / sizeof(word32)), (word32*)inBlock, + (word32*)outBlock); + break; + #endif + default: + WOLFSSL_MSG("Unknown key size"); + return BAD_FUNC_ARG; + } + if (ret != SSP_SUCCESS) { + return WC_HW_E; + } + + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == + CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords((word32*)outBlock, (word32*)outBlock, sz); + if (inBlock != outBlock) { + /* revert input */ + ByteReverseWords((word32*)inBlock, (word32*)inBlock, sz); + } + } + + return 0; + } + + #endif + + #if defined(HAVE_AESGCM) || defined(WOLFSSL_AES_DIRECT) + static int wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif + + #if defined(HAVE_AES_DECRYPT) && defined(WOLFSSL_AES_DIRECT) + static int wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) + { + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + } + #endif #else - /* using CTaoCrypt software AES implementation */ + + /* using wolfCrypt software implementation */ #define NEED_AES_TABLES -#endif /* STM32F2_CRYPTO */ +#endif + #ifdef NEED_AES_TABLES @@ -250,7 +933,8 @@ static const word32 rcon[] = { /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */ }; -static const word32 Te[5][256] = { +#ifndef WOLFSSL_AES_SMALL_TABLES +static const word32 Te[4][256] = { { 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU, 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U, @@ -514,76 +1198,11 @@ static const word32 Te[5][256] = { 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U, 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU, 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU, -}, -{ - 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU, - 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U, - 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU, - 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U, - 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU, - 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U, - 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU, - 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U, - 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U, - 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU, - 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U, - 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U, - 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U, - 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU, - 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U, - 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U, - 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU, - 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U, - 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U, - 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U, - 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU, - 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU, - 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U, - 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU, - 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU, - 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U, - 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU, - 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U, - 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU, - 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U, - 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U, - 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U, - 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU, - 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U, - 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU, - 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U, - 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU, - 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U, - 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U, - 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU, - 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU, - 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU, - 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U, - 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U, - 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU, - 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U, - 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU, - 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U, - 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU, - 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U, - 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU, - 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU, - 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U, - 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU, - 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U, - 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU, - 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U, - 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U, - 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U, - 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU, - 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU, - 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U, - 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU, - 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U, } }; -static const word32 Td[5][256] = { +#ifdef HAVE_AES_DECRYPT +static const word32 Td[4][256] = { { 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U, 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U, @@ -848,232 +1467,166 @@ static const word32 Td[5][256] = { 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU, 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U, 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U, -}, -{ - 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U, - 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U, - 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU, - 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU, - 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U, - 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U, - 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U, - 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU, - 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U, - 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU, - 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU, - 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU, - 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U, - 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U, - 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U, - 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U, - 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U, - 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U, - 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU, - 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U, - 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U, - 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU, - 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U, - 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U, - 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U, - 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU, - 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U, - 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U, - 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU, - 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U, - 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U, - 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU, - 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U, - 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU, - 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU, - 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U, - 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U, - 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U, - 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U, - 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU, - 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U, - 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U, - 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU, - 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU, - 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU, - 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U, - 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU, - 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U, - 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U, - 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U, - 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U, - 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU, - 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U, - 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU, - 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU, - 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU, - 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU, - 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U, - 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU, - 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U, - 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU, - 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U, - 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U, - 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU, } }; +#endif /* HAVE_AES_DECRYPT */ +#endif -#define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y)))) - -#ifdef WOLFSSL_AESNI - -/* Each platform needs to query info type 1 from cpuid to see if aesni is - * supported. Also, let's setup a macro for proper linkage w/o ABI conflicts - */ - -#ifndef _MSC_VER - - #define cpuid(reg, func)\ - __asm__ __volatile__ ("cpuid":\ - "=a" (reg[0]), "=b" (reg[1]), "=c" (reg[2]), "=d" (reg[3]) :\ - "a" (func)); - - #define XASM_LINK(f) asm(f) -#else - - #include <intrin.h> - #define cpuid(a,b) __cpuid((int*)a,b) +#ifdef HAVE_AES_DECRYPT +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) \ + || defined(WOLFSSL_AES_DIRECT) +static const byte Td4[256] = +{ + 0x52U, 0x09U, 0x6aU, 0xd5U, 0x30U, 0x36U, 0xa5U, 0x38U, + 0xbfU, 0x40U, 0xa3U, 0x9eU, 0x81U, 0xf3U, 0xd7U, 0xfbU, + 0x7cU, 0xe3U, 0x39U, 0x82U, 0x9bU, 0x2fU, 0xffU, 0x87U, + 0x34U, 0x8eU, 0x43U, 0x44U, 0xc4U, 0xdeU, 0xe9U, 0xcbU, + 0x54U, 0x7bU, 0x94U, 0x32U, 0xa6U, 0xc2U, 0x23U, 0x3dU, + 0xeeU, 0x4cU, 0x95U, 0x0bU, 0x42U, 0xfaU, 0xc3U, 0x4eU, + 0x08U, 0x2eU, 0xa1U, 0x66U, 0x28U, 0xd9U, 0x24U, 0xb2U, + 0x76U, 0x5bU, 0xa2U, 0x49U, 0x6dU, 0x8bU, 0xd1U, 0x25U, + 0x72U, 0xf8U, 0xf6U, 0x64U, 0x86U, 0x68U, 0x98U, 0x16U, + 0xd4U, 0xa4U, 0x5cU, 0xccU, 0x5dU, 0x65U, 0xb6U, 0x92U, + 0x6cU, 0x70U, 0x48U, 0x50U, 0xfdU, 0xedU, 0xb9U, 0xdaU, + 0x5eU, 0x15U, 0x46U, 0x57U, 0xa7U, 0x8dU, 0x9dU, 0x84U, + 0x90U, 0xd8U, 0xabU, 0x00U, 0x8cU, 0xbcU, 0xd3U, 0x0aU, + 0xf7U, 0xe4U, 0x58U, 0x05U, 0xb8U, 0xb3U, 0x45U, 0x06U, + 0xd0U, 0x2cU, 0x1eU, 0x8fU, 0xcaU, 0x3fU, 0x0fU, 0x02U, + 0xc1U, 0xafU, 0xbdU, 0x03U, 0x01U, 0x13U, 0x8aU, 0x6bU, + 0x3aU, 0x91U, 0x11U, 0x41U, 0x4fU, 0x67U, 0xdcU, 0xeaU, + 0x97U, 0xf2U, 0xcfU, 0xceU, 0xf0U, 0xb4U, 0xe6U, 0x73U, + 0x96U, 0xacU, 0x74U, 0x22U, 0xe7U, 0xadU, 0x35U, 0x85U, + 0xe2U, 0xf9U, 0x37U, 0xe8U, 0x1cU, 0x75U, 0xdfU, 0x6eU, + 0x47U, 0xf1U, 0x1aU, 0x71U, 0x1dU, 0x29U, 0xc5U, 0x89U, + 0x6fU, 0xb7U, 0x62U, 0x0eU, 0xaaU, 0x18U, 0xbeU, 0x1bU, + 0xfcU, 0x56U, 0x3eU, 0x4bU, 0xc6U, 0xd2U, 0x79U, 0x20U, + 0x9aU, 0xdbU, 0xc0U, 0xfeU, 0x78U, 0xcdU, 0x5aU, 0xf4U, + 0x1fU, 0xddU, 0xa8U, 0x33U, 0x88U, 0x07U, 0xc7U, 0x31U, + 0xb1U, 0x12U, 0x10U, 0x59U, 0x27U, 0x80U, 0xecU, 0x5fU, + 0x60U, 0x51U, 0x7fU, 0xa9U, 0x19U, 0xb5U, 0x4aU, 0x0dU, + 0x2dU, 0xe5U, 0x7aU, 0x9fU, 0x93U, 0xc9U, 0x9cU, 0xefU, + 0xa0U, 0xe0U, 0x3bU, 0x4dU, 0xaeU, 0x2aU, 0xf5U, 0xb0U, + 0xc8U, 0xebU, 0xbbU, 0x3cU, 0x83U, 0x53U, 0x99U, 0x61U, + 0x17U, 0x2bU, 0x04U, 0x7eU, 0xbaU, 0x77U, 0xd6U, 0x26U, + 0xe1U, 0x69U, 0x14U, 0x63U, 0x55U, 0x21U, 0x0cU, 0x7dU, +}; +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ +#endif /* HAVE_AES_DECRYPT */ - #define XASM_LINK(f) +#define GETBYTE(x, y) (word32)((byte)((x) >> (8 * (y)))) -#endif /* _MSC_VER */ +#ifdef WOLFSSL_AES_SMALL_TABLES +static const byte Tsbox[256] = { + 0x63U, 0x7cU, 0x77U, 0x7bU, 0xf2U, 0x6bU, 0x6fU, 0xc5U, + 0x30U, 0x01U, 0x67U, 0x2bU, 0xfeU, 0xd7U, 0xabU, 0x76U, + 0xcaU, 0x82U, 0xc9U, 0x7dU, 0xfaU, 0x59U, 0x47U, 0xf0U, + 0xadU, 0xd4U, 0xa2U, 0xafU, 0x9cU, 0xa4U, 0x72U, 0xc0U, + 0xb7U, 0xfdU, 0x93U, 0x26U, 0x36U, 0x3fU, 0xf7U, 0xccU, + 0x34U, 0xa5U, 0xe5U, 0xf1U, 0x71U, 0xd8U, 0x31U, 0x15U, + 0x04U, 0xc7U, 0x23U, 0xc3U, 0x18U, 0x96U, 0x05U, 0x9aU, + 0x07U, 0x12U, 0x80U, 0xe2U, 0xebU, 0x27U, 0xb2U, 0x75U, + 0x09U, 0x83U, 0x2cU, 0x1aU, 0x1bU, 0x6eU, 0x5aU, 0xa0U, + 0x52U, 0x3bU, 0xd6U, 0xb3U, 0x29U, 0xe3U, 0x2fU, 0x84U, + 0x53U, 0xd1U, 0x00U, 0xedU, 0x20U, 0xfcU, 0xb1U, 0x5bU, + 0x6aU, 0xcbU, 0xbeU, 0x39U, 0x4aU, 0x4cU, 0x58U, 0xcfU, + 0xd0U, 0xefU, 0xaaU, 0xfbU, 0x43U, 0x4dU, 0x33U, 0x85U, + 0x45U, 0xf9U, 0x02U, 0x7fU, 0x50U, 0x3cU, 0x9fU, 0xa8U, + 0x51U, 0xa3U, 0x40U, 0x8fU, 0x92U, 0x9dU, 0x38U, 0xf5U, + 0xbcU, 0xb6U, 0xdaU, 0x21U, 0x10U, 0xffU, 0xf3U, 0xd2U, + 0xcdU, 0x0cU, 0x13U, 0xecU, 0x5fU, 0x97U, 0x44U, 0x17U, + 0xc4U, 0xa7U, 0x7eU, 0x3dU, 0x64U, 0x5dU, 0x19U, 0x73U, + 0x60U, 0x81U, 0x4fU, 0xdcU, 0x22U, 0x2aU, 0x90U, 0x88U, + 0x46U, 0xeeU, 0xb8U, 0x14U, 0xdeU, 0x5eU, 0x0bU, 0xdbU, + 0xe0U, 0x32U, 0x3aU, 0x0aU, 0x49U, 0x06U, 0x24U, 0x5cU, + 0xc2U, 0xd3U, 0xacU, 0x62U, 0x91U, 0x95U, 0xe4U, 0x79U, + 0xe7U, 0xc8U, 0x37U, 0x6dU, 0x8dU, 0xd5U, 0x4eU, 0xa9U, + 0x6cU, 0x56U, 0xf4U, 0xeaU, 0x65U, 0x7aU, 0xaeU, 0x08U, + 0xbaU, 0x78U, 0x25U, 0x2eU, 0x1cU, 0xa6U, 0xb4U, 0xc6U, + 0xe8U, 0xddU, 0x74U, 0x1fU, 0x4bU, 0xbdU, 0x8bU, 0x8aU, + 0x70U, 0x3eU, 0xb5U, 0x66U, 0x48U, 0x03U, 0xf6U, 0x0eU, + 0x61U, 0x35U, 0x57U, 0xb9U, 0x86U, 0xc1U, 0x1dU, 0x9eU, + 0xe1U, 0xf8U, 0x98U, 0x11U, 0x69U, 0xd9U, 0x8eU, 0x94U, + 0x9bU, 0x1eU, 0x87U, 0xe9U, 0xceU, 0x55U, 0x28U, 0xdfU, + 0x8cU, 0xa1U, 0x89U, 0x0dU, 0xbfU, 0xe6U, 0x42U, 0x68U, + 0x41U, 0x99U, 0x2dU, 0x0fU, 0xb0U, 0x54U, 0xbbU, 0x16U +}; +#define AES_XTIME(x) ((byte)((byte)((x) << 1) ^ ((0 - ((x) >> 7)) & 0x1b))) -static int Check_CPU_support_AES(void) +static word32 col_mul(word32 t, int i2, int i3, int ia, int ib) { - unsigned int reg[4]; /* put a,b,c,d into 0,1,2,3 */ - cpuid(reg, 1); /* query info 1 */ - - if (reg[2] & 0x2000000) - return 1; + byte t3 = GETBYTE(t, i3); + byte tm = AES_XTIME(GETBYTE(t, i2) ^ t3); - return 0; + return GETBYTE(t, ia) ^ GETBYTE(t, ib) ^ t3 ^ tm; } -static int checkAESNI = 0; -static int haveAESNI = 0; - - -/* tell C compiler these are asm functions in case any mix up of ABI underscore - prefix between clang/gcc/llvm etc */ -void AES_CBC_encrypt(const unsigned char* in, unsigned char* out, - unsigned char* ivec, unsigned long length, - const unsigned char* KS, int nr) - XASM_LINK("AES_CBC_encrypt"); - - -void AES_CBC_decrypt(const unsigned char* in, unsigned char* out, - unsigned char* ivec, unsigned long length, - const unsigned char* KS, int nr) - XASM_LINK("AES_CBC_decrypt"); - -void AES_ECB_encrypt(const unsigned char* in, unsigned char* out, - unsigned long length, const unsigned char* KS, int nr) - XASM_LINK("AES_ECB_encrypt"); - - -void AES_ECB_decrypt(const unsigned char* in, unsigned char* out, - unsigned long length, const unsigned char* KS, int nr) - XASM_LINK("AES_ECB_decrypt"); - -void AES_128_Key_Expansion(const unsigned char* userkey, - unsigned char* key_schedule) - XASM_LINK("AES_128_Key_Expansion"); +static word32 inv_col_mul(word32 t, int i9, int ib, int id, int ie) +{ + byte t9 = GETBYTE(t, i9); + byte tb = GETBYTE(t, ib); + byte td = GETBYTE(t, id); + byte te = GETBYTE(t, ie); + byte t0 = t9 ^ tb ^ td; + return t0 ^ AES_XTIME(AES_XTIME(AES_XTIME(t0 ^ te) ^ td ^ te) ^ tb ^ te); +} +#endif -void AES_192_Key_Expansion(const unsigned char* userkey, - unsigned char* key_schedule) - XASM_LINK("AES_192_Key_Expansion"); +#if defined(HAVE_AES_CBC) || defined(WOLFSSL_AES_DIRECT) || defined(HAVE_AESGCM) -void AES_256_Key_Expansion(const unsigned char* userkey, - unsigned char* key_schedule) - XASM_LINK("AES_256_Key_Expansion"); +#ifndef WC_CACHE_LINE_SZ + #if defined(__x86_64__) || defined(_M_X64) || \ + (defined(__ILP32__) && (__ILP32__ >= 1)) + #define WC_CACHE_LINE_SZ 64 + #else + /* default cache line size */ + #define WC_CACHE_LINE_SZ 32 + #endif +#endif -static int AES_set_encrypt_key(const unsigned char *userKey, const int bits, - Aes* aes) +#ifndef WOLFSSL_AES_SMALL_TABLES +/* load 4 Te Tables into cache by cache line stride */ +static WC_INLINE word32 PreFetchTe(void) { - if (!userKey || !aes) - return BAD_FUNC_ARG; + word32 x = 0; + int i,j; - if (bits == 128) { - AES_128_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 10; - return 0; - } - else if (bits == 192) { - AES_192_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 12; - return 0; - } - else if (bits == 256) { - AES_256_Key_Expansion (userKey,(byte*)aes->key); aes->rounds = 14; - return 0; + for (i = 0; i < 4; i++) { + /* 256 elements, each one is 4 bytes */ + for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { + x &= Te[i][j]; + } } - return BAD_FUNC_ARG; + return x; } - - -static int AES_set_decrypt_key(const unsigned char* userKey, const int bits, - Aes* aes) +#else +/* load sbox into cache by cache line stride */ +static WC_INLINE word32 PreFetchSBox(void) { - int nr; - Aes temp_key; - __m128i *Key_Schedule = (__m128i*)aes->key; - __m128i *Temp_Key_Schedule = (__m128i*)temp_key.key; - - if (!userKey || !aes) - return BAD_FUNC_ARG; - - if (AES_set_encrypt_key(userKey,bits,&temp_key) == BAD_FUNC_ARG) - return BAD_FUNC_ARG; - - nr = temp_key.rounds; - aes->rounds = nr; - - Key_Schedule[nr] = Temp_Key_Schedule[0]; - Key_Schedule[nr-1] = _mm_aesimc_si128(Temp_Key_Schedule[1]); - Key_Schedule[nr-2] = _mm_aesimc_si128(Temp_Key_Schedule[2]); - Key_Schedule[nr-3] = _mm_aesimc_si128(Temp_Key_Schedule[3]); - Key_Schedule[nr-4] = _mm_aesimc_si128(Temp_Key_Schedule[4]); - Key_Schedule[nr-5] = _mm_aesimc_si128(Temp_Key_Schedule[5]); - Key_Schedule[nr-6] = _mm_aesimc_si128(Temp_Key_Schedule[6]); - Key_Schedule[nr-7] = _mm_aesimc_si128(Temp_Key_Schedule[7]); - Key_Schedule[nr-8] = _mm_aesimc_si128(Temp_Key_Schedule[8]); - Key_Schedule[nr-9] = _mm_aesimc_si128(Temp_Key_Schedule[9]); - - if(nr>10) { - Key_Schedule[nr-10] = _mm_aesimc_si128(Temp_Key_Schedule[10]); - Key_Schedule[nr-11] = _mm_aesimc_si128(Temp_Key_Schedule[11]); - } + word32 x = 0; + int i; - if(nr>12) { - Key_Schedule[nr-12] = _mm_aesimc_si128(Temp_Key_Schedule[12]); - Key_Schedule[nr-13] = _mm_aesimc_si128(Temp_Key_Schedule[13]); + for (i = 0; i < 256; i += WC_CACHE_LINE_SZ/4) { + x &= Tsbox[i]; } - - Key_Schedule[0] = Temp_Key_Schedule[nr]; - - return 0; + return x; } +#endif - - -#endif /* WOLFSSL_AESNI */ - - +/* Software AES - ECB Encrypt */ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) { word32 s0, s1, s2, s3; word32 t0, t1, t2, t3; word32 r = aes->rounds >> 1; - const word32* rk = aes->key; + if (r > 7 || r == 0) { WOLFSSL_MSG("AesEncrypt encountered improper key, set it up"); - return; /* stop instead of segfaulting, set up your keys! */ + return; /* stop instead of seg-faulting, set up your keys! */ } + #ifdef WOLFSSL_AESNI if (haveAESNI && aes->use_aesni) { #ifdef DEBUG_AESNI @@ -1086,17 +1639,20 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) #endif /* check alignment, decrypt doesn't need alignment */ - if ((wolfssl_word)inBlock % 16) { + if ((wolfssl_word)inBlock % AESNI_ALIGN) { #ifndef NO_WOLFSSL_ALLOC_ALIGN - byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE, NULL, + byte* tmp = (byte*)XMALLOC(AES_BLOCK_SIZE + AESNI_ALIGN, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + byte* tmp_align; if (tmp == NULL) return; - XMEMCPY(tmp, inBlock, AES_BLOCK_SIZE); - AES_ECB_encrypt(tmp, tmp, AES_BLOCK_SIZE, (byte*)aes->key, - aes->rounds); - XMEMCPY(outBlock, tmp, AES_BLOCK_SIZE); - XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER); + tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); + + XMEMCPY(tmp_align, inBlock, AES_BLOCK_SIZE); + AES_ECB_encrypt(tmp_align, tmp_align, AES_BLOCK_SIZE, + (byte*)aes->key, aes->rounds); + XMEMCPY(outBlock, tmp_align, AES_BLOCK_SIZE); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); return; #else WOLFSSL_MSG("AES-ECB encrypt with bad alignment"); @@ -1115,6 +1671,10 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) #endif } #endif +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + AES_ECB_encrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); + return; +#endif /* * map byte array block to cipher state @@ -1125,46 +1685,50 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); - #ifdef LITTLE_ENDIAN_ORDER - s0 = ByteReverseWord32(s0); - s1 = ByteReverseWord32(s1); - s2 = ByteReverseWord32(s2); - s3 = ByteReverseWord32(s3); - #endif +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif + /* AddRoundKey */ s0 ^= rk[0]; s1 ^= rk[1]; s2 ^= rk[2]; s3 ^= rk[3]; +#ifndef WOLFSSL_AES_SMALL_TABLES + s0 |= PreFetchTe(); + /* * Nr - 1 full rounds: */ for (;;) { t0 = - Te[0][GETBYTE(s0, 3)] ^ - Te[1][GETBYTE(s1, 2)] ^ - Te[2][GETBYTE(s2, 1)] ^ - Te[3][GETBYTE(s3, 0)] ^ + Te[0][GETBYTE(s0, 3)] ^ + Te[1][GETBYTE(s1, 2)] ^ + Te[2][GETBYTE(s2, 1)] ^ + Te[3][GETBYTE(s3, 0)] ^ rk[4]; t1 = - Te[0][GETBYTE(s1, 3)] ^ - Te[1][GETBYTE(s2, 2)] ^ - Te[2][GETBYTE(s3, 1)] ^ - Te[3][GETBYTE(s0, 0)] ^ + Te[0][GETBYTE(s1, 3)] ^ + Te[1][GETBYTE(s2, 2)] ^ + Te[2][GETBYTE(s3, 1)] ^ + Te[3][GETBYTE(s0, 0)] ^ rk[5]; t2 = Te[0][GETBYTE(s2, 3)] ^ - Te[1][GETBYTE(s3, 2)] ^ - Te[2][GETBYTE(s0, 1)] ^ - Te[3][GETBYTE(s1, 0)] ^ + Te[1][GETBYTE(s3, 2)] ^ + Te[2][GETBYTE(s0, 1)] ^ + Te[3][GETBYTE(s1, 0)] ^ rk[6]; t3 = Te[0][GETBYTE(s3, 3)] ^ - Te[1][GETBYTE(s0, 2)] ^ - Te[2][GETBYTE(s1, 1)] ^ - Te[3][GETBYTE(s2, 0)] ^ + Te[1][GETBYTE(s0, 2)] ^ + Te[2][GETBYTE(s1, 1)] ^ + Te[3][GETBYTE(s2, 0)] ^ rk[7]; rk += 8; @@ -1204,44 +1768,158 @@ static void wc_AesEncrypt(Aes* aes, const byte* inBlock, byte* outBlock) */ s0 = - (Te[4][GETBYTE(t0, 3)] & 0xff000000) ^ - (Te[4][GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Te[4][GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Te[4][GETBYTE(t3, 0)] & 0x000000ff) ^ + (Te[2][GETBYTE(t0, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t1, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t2, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t3, 0)] & 0x000000ff) ^ rk[0]; s1 = - (Te[4][GETBYTE(t1, 3)] & 0xff000000) ^ - (Te[4][GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Te[4][GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Te[4][GETBYTE(t0, 0)] & 0x000000ff) ^ + (Te[2][GETBYTE(t1, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t2, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t3, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t0, 0)] & 0x000000ff) ^ rk[1]; s2 = - (Te[4][GETBYTE(t2, 3)] & 0xff000000) ^ - (Te[4][GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Te[4][GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Te[4][GETBYTE(t1, 0)] & 0x000000ff) ^ + (Te[2][GETBYTE(t2, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t3, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t0, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t1, 0)] & 0x000000ff) ^ rk[2]; s3 = - (Te[4][GETBYTE(t3, 3)] & 0xff000000) ^ - (Te[4][GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Te[4][GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Te[4][GETBYTE(t2, 0)] & 0x000000ff) ^ + (Te[2][GETBYTE(t3, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(t0, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(t1, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(t2, 0)] & 0x000000ff) ^ rk[3]; +#else + s0 |= PreFetchSBox(); + + r *= 2; + /* Two rounds at a time */ + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + + s0 = + (col_mul(t0, 3, 2, 0, 1) << 24) ^ + (col_mul(t0, 2, 1, 0, 3) << 16) ^ + (col_mul(t0, 1, 0, 2, 3) << 8) ^ + (col_mul(t0, 0, 3, 2, 1) ) ^ + rk[0]; + s1 = + (col_mul(t1, 3, 2, 0, 1) << 24) ^ + (col_mul(t1, 2, 1, 0, 3) << 16) ^ + (col_mul(t1, 1, 0, 2, 3) << 8) ^ + (col_mul(t1, 0, 3, 2, 1) ) ^ + rk[1]; + s2 = + (col_mul(t2, 3, 2, 0, 1) << 24) ^ + (col_mul(t2, 2, 1, 0, 3) << 16) ^ + (col_mul(t2, 1, 0, 2, 3) << 8) ^ + (col_mul(t2, 0, 3, 2, 1) ) ^ + rk[2]; + s3 = + (col_mul(t3, 3, 2, 0, 1) << 24) ^ + (col_mul(t3, 2, 1, 0, 3) << 16) ^ + (col_mul(t3, 1, 0, 2, 3) << 8) ^ + (col_mul(t3, 0, 3, 2, 1) ) ^ + rk[3]; + } + + t0 = + ((word32)Tsbox[GETBYTE(s0, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s1, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s2, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s3, 0)]); + t1 = + ((word32)Tsbox[GETBYTE(s1, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s2, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s3, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s0, 0)]); + t2 = + ((word32)Tsbox[GETBYTE(s2, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s3, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s0, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s1, 0)]); + t3 = + ((word32)Tsbox[GETBYTE(s3, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(s0, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(s1, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(s2, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif /* write out */ - #ifdef LITTLE_ENDIAN_ORDER - s0 = ByteReverseWord32(s0); - s1 = ByteReverseWord32(s1); - s2 = ByteReverseWord32(s2); - s3 = ByteReverseWord32(s3); - #endif +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif XMEMCPY(outBlock, &s0, sizeof(s0)); XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); + } +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT || HAVE_AESGCM */ + +#if defined(HAVE_AES_DECRYPT) +#if (defined(HAVE_AES_CBC) && !defined(WOLFSSL_DEVCRYPTO_CBC)) || \ + defined(WOLFSSL_AES_DIRECT) + +#ifndef WOLFSSL_AES_SMALL_TABLES +/* load 4 Td Tables into cache by cache line stride */ +static WC_INLINE word32 PreFetchTd(void) +{ + word32 x = 0; + int i,j; + for (i = 0; i < 4; i++) { + /* 256 elements, each one is 4 bytes */ + for (j = 0; j < 256; j += WC_CACHE_LINE_SZ/4) { + x &= Td[i][j]; + } + } + return x; +} +#endif + +/* load Td Table4 into cache by cache line stride */ +static WC_INLINE word32 PreFetchTd4(void) +{ + word32 x = 0; + int i; + + for (i = 0; i < 256; i += WC_CACHE_LINE_SZ) { + x &= (word32)Td4[i]; + } + return x; +} + +/* Software AES - ECB Decrypt */ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) { word32 s0, s1, s2, s3; @@ -1251,7 +1929,7 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) const word32* rk = aes->key; if (r > 7 || r == 0) { WOLFSSL_MSG("AesDecrypt encountered improper key, set it up"); - return; /* stop instead of segfaulting, set up your keys! */ + return; /* stop instead of seg-faulting, set up your keys! */ } #ifdef WOLFSSL_AESNI if (haveAESNI && aes->use_aesni) { @@ -1265,7 +1943,8 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) #endif /* if input and output same will overwrite input iv */ - XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); + if ((const byte*)aes->tmp != inBlock) + XMEMCPY(aes->tmp, inBlock, AES_BLOCK_SIZE); AES_ECB_decrypt(inBlock, outBlock, AES_BLOCK_SIZE, (byte*)aes->key, aes->rounds); return; @@ -1275,6 +1954,9 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) printf("Skipping AES-NI\n"); #endif } +#endif /* WOLFSSL_AESNI */ +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + return AES_ECB_decrypt(aes, inBlock, outBlock, AES_BLOCK_SIZE); #endif /* @@ -1286,18 +1968,21 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) XMEMCPY(&s2, inBlock + 2 * sizeof(s0), sizeof(s2)); XMEMCPY(&s3, inBlock + 3 * sizeof(s0), sizeof(s3)); - #ifdef LITTLE_ENDIAN_ORDER - s0 = ByteReverseWord32(s0); - s1 = ByteReverseWord32(s1); - s2 = ByteReverseWord32(s2); - s3 = ByteReverseWord32(s3); - #endif +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif s0 ^= rk[0]; s1 ^= rk[1]; s2 ^= rk[2]; s3 ^= rk[3]; +#ifndef WOLFSSL_AES_SMALL_TABLES + s0 |= PreFetchTd(); + /* * Nr - 1 full rounds: */ @@ -1362,70 +2047,170 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) * apply last round and * map cipher state to byte array block: */ + + t0 |= PreFetchTd4(); + s0 = - (Td[4][GETBYTE(t0, 3)] & 0xff000000) ^ - (Td[4][GETBYTE(t3, 2)] & 0x00ff0000) ^ - (Td[4][GETBYTE(t2, 1)] & 0x0000ff00) ^ - (Td[4][GETBYTE(t1, 0)] & 0x000000ff) ^ + ((word32)Td4[GETBYTE(t0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t1, 0)]) ^ rk[0]; s1 = - (Td[4][GETBYTE(t1, 3)] & 0xff000000) ^ - (Td[4][GETBYTE(t0, 2)] & 0x00ff0000) ^ - (Td[4][GETBYTE(t3, 1)] & 0x0000ff00) ^ - (Td[4][GETBYTE(t2, 0)] & 0x000000ff) ^ + ((word32)Td4[GETBYTE(t1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t2, 0)]) ^ rk[1]; s2 = - (Td[4][GETBYTE(t2, 3)] & 0xff000000) ^ - (Td[4][GETBYTE(t1, 2)] & 0x00ff0000) ^ - (Td[4][GETBYTE(t0, 1)] & 0x0000ff00) ^ - (Td[4][GETBYTE(t3, 0)] & 0x000000ff) ^ + ((word32)Td4[GETBYTE(t2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t3, 0)]) ^ rk[2]; s3 = - (Td[4][GETBYTE(t3, 3)] & 0xff000000) ^ - (Td[4][GETBYTE(t2, 2)] & 0x00ff0000) ^ - (Td[4][GETBYTE(t1, 1)] & 0x0000ff00) ^ - (Td[4][GETBYTE(t0, 0)] & 0x000000ff) ^ + ((word32)Td4[GETBYTE(t3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(t2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(t1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(t0, 0)]) ^ rk[3]; +#else + s0 |= PreFetchTd4(); + + r *= 2; + for (rk += 4; r > 1; r--, rk += 4) { + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]) ^ + rk[0]; + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]) ^ + rk[1]; + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]) ^ + rk[2]; + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]) ^ + rk[3]; + + s0 = + (inv_col_mul(t0, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t0, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t0, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t0, 1, 3, 2, 0) ); + s1 = + (inv_col_mul(t1, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t1, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t1, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t1, 1, 3, 2, 0) ); + s2 = + (inv_col_mul(t2, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t2, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t2, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t2, 1, 3, 2, 0) ); + s3 = + (inv_col_mul(t3, 0, 2, 1, 3) << 24) ^ + (inv_col_mul(t3, 3, 1, 0, 2) << 16) ^ + (inv_col_mul(t3, 2, 0, 3, 1) << 8) ^ + (inv_col_mul(t3, 1, 3, 2, 0) ); + } + + t0 = + ((word32)Td4[GETBYTE(s0, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s3, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s2, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s1, 0)]); + t1 = + ((word32)Td4[GETBYTE(s1, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s0, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s3, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s2, 0)]); + t2 = + ((word32)Td4[GETBYTE(s2, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s1, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s0, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s3, 0)]); + t3 = + ((word32)Td4[GETBYTE(s3, 3)] << 24) ^ + ((word32)Td4[GETBYTE(s2, 2)] << 16) ^ + ((word32)Td4[GETBYTE(s1, 1)] << 8) ^ + ((word32)Td4[GETBYTE(s0, 0)]); + s0 = t0 ^ rk[0]; + s1 = t1 ^ rk[1]; + s2 = t2 ^ rk[2]; + s3 = t3 ^ rk[3]; +#endif /* write out */ - #ifdef LITTLE_ENDIAN_ORDER - s0 = ByteReverseWord32(s0); - s1 = ByteReverseWord32(s1); - s2 = ByteReverseWord32(s2); - s3 = ByteReverseWord32(s3); - #endif +#ifdef LITTLE_ENDIAN_ORDER + s0 = ByteReverseWord32(s0); + s1 = ByteReverseWord32(s1); + s2 = ByteReverseWord32(s2); + s3 = ByteReverseWord32(s3); +#endif XMEMCPY(outBlock, &s0, sizeof(s0)); XMEMCPY(outBlock + sizeof(s0), &s1, sizeof(s1)); XMEMCPY(outBlock + 2 * sizeof(s0), &s2, sizeof(s2)); XMEMCPY(outBlock + 3 * sizeof(s0), &s3, sizeof(s3)); } +#endif /* HAVE_AES_CBC || WOLFSSL_AES_DIRECT */ +#endif /* HAVE_AES_DECRYPT */ #endif /* NEED_AES_TABLES */ + /* wc_AesSetKey */ -#ifdef STM32F2_CRYPTO - int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, - int dir) +#if defined(STM32_CRYPTO) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) { - word32 *rk = aes->key; + word32 *rk; - if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + (void)dir; + + if (aes == NULL || (keylen != 16 && + #ifdef WOLFSSL_AES_192 + keylen != 24 && + #endif + keylen != 32)) { return BAD_FUNC_ARG; + } + rk = aes->key; + aes->keylen = keylen; aes->rounds = keylen/4 + 6; XMEMCPY(rk, userKey, keylen); + #if !defined(WOLFSSL_STM32_CUBEMX) || defined(STM32_HAL_V2) ByteReverseWords(rk, rk, keylen); + #endif + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif return wc_AesSetIV(aes, iv); } - - int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, - const byte* iv, int dir) - { - return wc_AesSetKey(aes, userKey, keylen, iv, dir); - } + #if defined(WOLFSSL_AES_DIRECT) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + #endif #elif defined(HAVE_COLDFIRE_SEC) #if defined (HAVE_THREADX) @@ -1447,30 +2232,30 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) extern volatile unsigned char __MBAR[]; - int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, - int dir) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) { if (AESBuffIn == NULL) { - #if defined (HAVE_THREADX) - int s1, s2, s3, s4, s5 ; - s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, - sizeof(SECdescriptorType), TX_NO_WAIT); - s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn, - AES_BUFFER_SIZE, TX_NO_WAIT); - s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut, - AES_BUFFER_SIZE, TX_NO_WAIT); - s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey, - AES_BLOCK_SIZE*2, TX_NO_WAIT); - s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg, - AES_BLOCK_SIZE, TX_NO_WAIT); - - if(s1 || s2 || s3 || s4 || s5) - return BAD_FUNC_ARG; - #else - #warning "Allocate non-Cache buffers" - #endif + #if defined (HAVE_THREADX) + int s1, s2, s3, s4, s5; + s5 = tx_byte_allocate(&mp_ncached,(void *)&secDesc, + sizeof(SECdescriptorType), TX_NO_WAIT); + s1 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffIn, + AES_BUFFER_SIZE, TX_NO_WAIT); + s2 = tx_byte_allocate(&mp_ncached, (void *)&AESBuffOut, + AES_BUFFER_SIZE, TX_NO_WAIT); + s3 = tx_byte_allocate(&mp_ncached, (void *)&secKey, + AES_BLOCK_SIZE*2, TX_NO_WAIT); + s4 = tx_byte_allocate(&mp_ncached, (void *)&secReg, + AES_BLOCK_SIZE, TX_NO_WAIT); + + if (s1 || s2 || s3 || s4 || s5) + return BAD_FUNC_ARG; + #else + #warning "Allocate non-Cache buffers" + #endif - InitMutex(&Mutex_AesSEC); + wc_InitMutex(&Mutex_AesSEC); } if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) @@ -1479,29 +2264,165 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) if (aes == NULL) return BAD_FUNC_ARG; + aes->keylen = keylen; aes->rounds = keylen/4 + 6; XMEMCPY(aes->key, userKey, keylen); if (iv) XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + return 0; } -#elif defined(FREESCALE_MMCAU) +#elif defined(FREESCALE_LTC) int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { - byte *rk = (byte*)aes->key; + if (aes == NULL || !((keylen == 16) || (keylen == 24) || (keylen == 32))) + return BAD_FUNC_ARG; + + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return wc_AesSetIV(aes, iv); + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(FREESCALE_MMCAU) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + byte* rk; + byte* tmpKey = (byte*)userKey; + int tmpKeyDynamic = 0; + word32 alignOffset = 0; + + (void)dir; if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) return BAD_FUNC_ARG; + if (aes == NULL) + return BAD_FUNC_ARG; + rk = (byte*)aes->key; if (rk == NULL) return BAD_FUNC_ARG; + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + aes->rounds = keylen/4 + 6; - cau_aes_set_key(userKey, keylen*8, rk); + #ifdef FREESCALE_MMCAU_CLASSIC + if ((wolfssl_word)userKey % WOLFSSL_MMCAU_ALIGNMENT) { + #ifndef NO_WOLFSSL_ALLOC_ALIGN + byte* tmp = (byte*)XMALLOC(keylen + WOLFSSL_MMCAU_ALIGNMENT, + aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + if (tmp == NULL) { + return MEMORY_E; + } + alignOffset = WOLFSSL_MMCAU_ALIGNMENT - + ((wolfssl_word)tmp % WOLFSSL_MMCAU_ALIGNMENT); + tmpKey = tmp + alignOffset; + XMEMCPY(tmpKey, userKey, keylen); + tmpKeyDynamic = 1; + #else + WOLFSSL_MSG("Bad cau_aes_set_key alignment"); + return BAD_ALIGN_E; + #endif + } + #endif + + ret = wolfSSL_CryptHwMutexLock(); + if(ret == 0) { + #ifdef FREESCALE_MMCAU_CLASSIC + cau_aes_set_key(tmpKey, keylen*8, rk); + #else + MMCAU_AES_SetKey(tmpKey, keylen, rk); + #endif + wolfSSL_CryptHwMutexUnLock(); + + ret = wc_AesSetIV(aes, iv); + } + + if (tmpKeyDynamic == 1) { + XFREE(tmpKey - alignOffset, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + return ret; + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + +#elif defined(WOLFSSL_NRF51_AES) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; + + (void)dir; + (void)iv; + + if (aes == NULL || keylen != 16) + return BAD_FUNC_ARG; + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + ret = nrf51_aes_set_key(userKey); + + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif + + return ret; + } + + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + (void)dir; + (void)iv; + + if (aes == NULL || (keylen != 16 && keylen != 24 && keylen != 32)) { + return BAD_FUNC_ARG; + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + XMEMCPY(aes->key, userKey, keylen); + #if defined(WOLFSSL_AES_COUNTER) + aes->left = 0; + #endif return wc_AesSetIV(aes, iv); } @@ -1510,51 +2431,141 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) { return wc_AesSetKey(aes, userKey, keylen, iv, dir); } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, + int dir) + { + SaSiError_t ret = SASI_OK; + SaSiAesIv_t iv_aes; + + if (aes == NULL || + (keylen != AES_128_KEY_SIZE && + keylen != AES_192_KEY_SIZE && + keylen != AES_256_KEY_SIZE)) { + return BAD_FUNC_ARG; + } + #if defined(AES_MAX_KEY_SIZE) + if (keylen > (AES_MAX_KEY_SIZE/8)) { + return BAD_FUNC_ARG; + } + #endif + if (dir != AES_ENCRYPTION && + dir != AES_DECRYPTION) { + return BAD_FUNC_ARG; + } + + if (dir == AES_ENCRYPTION) { + aes->ctx.mode = SASI_AES_ENCRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_ENCRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + else { + aes->ctx.mode = SASI_AES_DECRYPT; + SaSi_AesInit(&aes->ctx.user_ctx, + SASI_AES_DECRYPT, + SASI_AES_MODE_CBC, + SASI_AES_PADDING_NONE); + } + + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + XMEMCPY(aes->key, userKey, keylen); + + aes->ctx.key.pKey = (uint8_t*)aes->key; + aes->ctx.key.keySize= keylen; + + ret = SaSi_AesSetKey(&aes->ctx.user_ctx, + SASI_AES_USER_KEY, + &aes->ctx.key, + sizeof(aes->ctx.key)); + if (ret != SASI_OK) { + return BAD_FUNC_ARG; + } + + ret = wc_AesSetIV(aes, iv); + + if (iv) + XMEMCPY(iv_aes, iv, AES_BLOCK_SIZE); + else + XMEMSET(iv_aes, 0, AES_BLOCK_SIZE); + + + ret = SaSi_AesSetIv(&aes->ctx.user_ctx, iv_aes); + if (ret != SASI_OK) { + return ret; + } + return ret; + } + #if defined(WOLFSSL_AES_DIRECT) + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + return wc_AesSetKey(aes, userKey, keylen, iv, dir); + } + #endif + +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypto/devcrypto_aes.c */ + #else + + /* Software AES - SetKey */ static int wc_AesSetKeyLocal(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, int dir) { - word32 temp, *rk = aes->key; + word32 *rk = aes->key; + #ifdef NEED_AES_TABLES + word32 temp; unsigned int i = 0; + #endif #ifdef WOLFSSL_AESNI aes->use_aesni = 0; #endif /* WOLFSSL_AESNI */ - #ifdef WOLFSSL_AES_COUNTER + #if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_COUNTER) || \ + defined(WOLFSSL_AES_OFB) aes->left = 0; - #endif /* WOLFSSL_AES_COUNTER */ + #endif - aes->rounds = keylen/4 + 6; + aes->keylen = keylen; + aes->rounds = (keylen/4) + 6; XMEMCPY(rk, userKey, keylen); - #ifdef LITTLE_ENDIAN_ORDER - ByteReverseWords(rk, rk, keylen); - #endif - - #ifdef WOLFSSL_PIC32MZ_CRYPT - { - word32 *akey1 = aes->key_ce; - word32 *areg = aes->iv_ce ; - aes->keylen = keylen ; - XMEMCPY(akey1, userKey, keylen); - if (iv) - XMEMCPY(areg, iv, AES_BLOCK_SIZE); - else - XMEMSET(areg, 0, AES_BLOCK_SIZE); - } - #endif + #if defined(LITTLE_ENDIAN_ORDER) && !defined(WOLFSSL_PIC32MZ_CRYPT) && \ + (!defined(WOLFSSL_ESP32WROOM32_CRYPT) || \ + defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES)) + ByteReverseWords(rk, rk, keylen); + #endif - switch(keylen) - { +#ifdef NEED_AES_TABLES + switch (keylen) { + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 128 && \ + defined(WOLFSSL_AES_128) case 16: while (1) { temp = rk[3]; rk[4] = rk[0] ^ - (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[5] = rk[1] ^ rk[4]; rk[6] = rk[2] ^ rk[5]; @@ -1564,17 +2575,27 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) rk += 4; } break; + #endif /* 128 */ + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 192 && \ + defined(WOLFSSL_AES_192) case 24: /* for (;;) here triggers a bug in VC60 SP4 w/ Pro Pack */ while (1) { temp = rk[ 5]; rk[ 6] = rk[ 0] ^ - (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[ 7] = rk[ 1] ^ rk[ 6]; rk[ 8] = rk[ 2] ^ rk[ 7]; @@ -1586,16 +2607,26 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) rk += 6; } break; + #endif /* 192 */ + #if defined(AES_MAX_KEY_SIZE) && AES_MAX_KEY_SIZE >= 256 && \ + defined(WOLFSSL_AES_256) case 32: while (1) { temp = rk[ 7]; rk[ 8] = rk[ 0] ^ - (Te[4][GETBYTE(temp, 2)] & 0xff000000) ^ - (Te[4][GETBYTE(temp, 1)] & 0x00ff0000) ^ - (Te[4][GETBYTE(temp, 0)] & 0x0000ff00) ^ - (Te[4][GETBYTE(temp, 3)] & 0x000000ff) ^ + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 2)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 1)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 0)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 3)] & 0x000000ff) ^ + #else + ((word32)Tsbox[GETBYTE(temp, 2)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 0)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 3)]) ^ + #endif rcon[i]; rk[ 9] = rk[ 1] ^ rk[ 8]; rk[10] = rk[ 2] ^ rk[ 9]; @@ -1604,10 +2635,17 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) break; temp = rk[11]; rk[12] = rk[ 4] ^ - (Te[4][GETBYTE(temp, 3)] & 0xff000000) ^ - (Te[4][GETBYTE(temp, 2)] & 0x00ff0000) ^ - (Te[4][GETBYTE(temp, 1)] & 0x0000ff00) ^ - (Te[4][GETBYTE(temp, 0)] & 0x000000ff); + #ifndef WOLFSSL_AES_SMALL_TABLES + (Te[2][GETBYTE(temp, 3)] & 0xff000000) ^ + (Te[3][GETBYTE(temp, 2)] & 0x00ff0000) ^ + (Te[0][GETBYTE(temp, 1)] & 0x0000ff00) ^ + (Te[1][GETBYTE(temp, 0)] & 0x000000ff); + #else + ((word32)Tsbox[GETBYTE(temp, 3)] << 24) ^ + ((word32)Tsbox[GETBYTE(temp, 2)] << 16) ^ + ((word32)Tsbox[GETBYTE(temp, 1)] << 8) ^ + ((word32)Tsbox[GETBYTE(temp, 0)]); + #endif rk[13] = rk[ 5] ^ rk[12]; rk[14] = rk[ 6] ^ rk[13]; rk[15] = rk[ 7] ^ rk[14]; @@ -1615,13 +2653,14 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) rk += 8; } break; + #endif /* 256 */ default: return BAD_FUNC_ARG; - } + } /* switch */ - if (dir == AES_DECRYPTION) - { + #if defined(HAVE_AES_DECRYPT) + if (dir == AES_DECRYPTION) { unsigned int j; rk = aes->key; @@ -1632,78 +2671,169 @@ static void wc_AesDecrypt(Aes* aes, const byte* inBlock, byte* outBlock) temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp; temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp; } + #if !defined(WOLFSSL_AES_SMALL_TABLES) /* apply the inverse MixColumn transform to all round keys but the first and the last: */ for (i = 1; i < aes->rounds; i++) { rk += 4; rk[0] = - Td[0][Te[4][GETBYTE(rk[0], 3)] & 0xff] ^ - Td[1][Te[4][GETBYTE(rk[0], 2)] & 0xff] ^ - Td[2][Te[4][GETBYTE(rk[0], 1)] & 0xff] ^ - Td[3][Te[4][GETBYTE(rk[0], 0)] & 0xff]; + Td[0][Te[1][GETBYTE(rk[0], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[0], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[0], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[0], 0)] & 0xff]; rk[1] = - Td[0][Te[4][GETBYTE(rk[1], 3)] & 0xff] ^ - Td[1][Te[4][GETBYTE(rk[1], 2)] & 0xff] ^ - Td[2][Te[4][GETBYTE(rk[1], 1)] & 0xff] ^ - Td[3][Te[4][GETBYTE(rk[1], 0)] & 0xff]; + Td[0][Te[1][GETBYTE(rk[1], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[1], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[1], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[1], 0)] & 0xff]; rk[2] = - Td[0][Te[4][GETBYTE(rk[2], 3)] & 0xff] ^ - Td[1][Te[4][GETBYTE(rk[2], 2)] & 0xff] ^ - Td[2][Te[4][GETBYTE(rk[2], 1)] & 0xff] ^ - Td[3][Te[4][GETBYTE(rk[2], 0)] & 0xff]; + Td[0][Te[1][GETBYTE(rk[2], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[2], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[2], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[2], 0)] & 0xff]; rk[3] = - Td[0][Te[4][GETBYTE(rk[3], 3)] & 0xff] ^ - Td[1][Te[4][GETBYTE(rk[3], 2)] & 0xff] ^ - Td[2][Te[4][GETBYTE(rk[3], 1)] & 0xff] ^ - Td[3][Te[4][GETBYTE(rk[3], 0)] & 0xff]; + Td[0][Te[1][GETBYTE(rk[3], 3)] & 0xff] ^ + Td[1][Te[1][GETBYTE(rk[3], 2)] & 0xff] ^ + Td[2][Te[1][GETBYTE(rk[3], 1)] & 0xff] ^ + Td[3][Te[1][GETBYTE(rk[3], 0)] & 0xff]; } + #endif + } + #else + (void)dir; + #endif /* HAVE_AES_DECRYPT */ + (void)temp; +#endif /* NEED_AES_TABLES */ + +#if defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + XMEMCPY((byte*)aes->key, userKey, keylen); + if (WOLFSSL_SCE_GSCE_HANDLE.p_cfg->endian_flag == CRYPTO_WORD_ENDIAN_BIG) { + ByteReverseWords(aes->key, aes->key, 32); } +#endif return wc_AesSetIV(aes, iv); } - int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, const byte* iv, - int dir) + int wc_AesSetKey(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) { + int ret; + #if defined(AES_MAX_KEY_SIZE) + const word32 max_key_len = (AES_MAX_KEY_SIZE / 8); + #endif - if (!((keylen == 16) || (keylen == 24) || (keylen == 32))) + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (keylen == (16 + WC_CAAM_BLOB_SZ) || + keylen == (24 + WC_CAAM_BLOB_SZ) || + keylen == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) != 0) { + return BAD_FUNC_ARG; + } + + /* set local values */ + userKey = local; + keylen = localSz; + } + #endif + if (aes == NULL || + !((keylen == 16) || (keylen == 24) || (keylen == 32))) { return BAD_FUNC_ARG; + } - #ifdef HAVE_CAVIUM - if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC) - return wc_AesCaviumSetKey(aes, userKey, keylen, iv); + #if defined(AES_MAX_KEY_SIZE) + /* Check key length */ + if (keylen > max_key_len) { + return BAD_FUNC_ARG; + } + #endif + aes->keylen = keylen; + aes->rounds = keylen/4 + 6; + + #if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) #endif + { + XMEMCPY(aes->devKey, userKey, keylen); + } + #endif - #ifdef WOLFSSL_AESNI + #ifdef WOLFSSL_AESNI if (checkAESNI == 0) { haveAESNI = Check_CPU_support_AES(); checkAESNI = 1; } if (haveAESNI) { + #if defined(WOLFSSL_AES_COUNTER) || defined(WOLFSSL_AES_CFB) || \ + defined(WOLFSSL_AES_OFB) + aes->left = 0; + #endif /* WOLFSSL_AES_COUNTER */ aes->use_aesni = 1; if (iv) XMEMCPY(aes->reg, iv, AES_BLOCK_SIZE); + else + XMEMSET(aes->reg, 0, AES_BLOCK_SIZE); if (dir == AES_ENCRYPTION) return AES_set_encrypt_key(userKey, keylen * 8, aes); + #ifdef HAVE_AES_DECRYPT else return AES_set_decrypt_key(userKey, keylen * 8, aes); + #endif } - #endif /* WOLFSSL_AESNI */ + #endif /* WOLFSSL_AESNI */ + + ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); - return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); + #if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; + #endif + #ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); + #endif + return ret; } #if defined(WOLFSSL_AES_DIRECT) || defined(WOLFSSL_AES_COUNTER) + /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */ + int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, + const byte* iv, int dir) + { + int ret; - /* AES-CTR and AES-DIRECT need to use this for key setup, no aesni yet */ - int wc_AesSetKeyDirect(Aes* aes, const byte* userKey, word32 keylen, - const byte* iv, int dir) - { - return wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); - } + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (keylen == (16 + WC_CAAM_BLOB_SZ) || + keylen == (24 + WC_CAAM_BLOB_SZ) || + keylen == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)userKey, keylen, local, &localSz) + != 0) { + return BAD_FUNC_ARG; + } + /* set local values */ + userKey = local; + keylen = localSz; + } + #endif + ret = wc_AesSetKeyLocal(aes, userKey, keylen, iv, dir); + + #ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); + #endif + + return ret; + } #endif /* WOLFSSL_AES_DIRECT || WOLFSSL_AES_COUNTER */ -#endif /* STM32F2_CRYPTO, wc_AesSetKey block */ +#endif /* wc_AesSetKey block */ /* wc_AesSetIV is shared between software and hardware */ @@ -1720,160 +2850,265 @@ int wc_AesSetIV(Aes* aes, const byte* iv) return 0; } - -int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, - const byte* key, word32 keySz, const byte* iv) -{ - int ret = 0; -#ifdef WOLFSSL_SMALL_STACK - Aes* aes = NULL; -#else - Aes aes[1]; -#endif - -#ifdef WOLFSSL_SMALL_STACK - aes = (Aes*)XMALLOC(sizeof(Aes), NULL, DYNAMIC_TYPE_TMP_BUFFER); - if (aes == NULL) - return MEMORY_E; -#endif - - ret = wc_AesSetKey(aes, key, keySz, iv, AES_DECRYPTION); - if (ret == 0) - ret = wc_AesCbcDecrypt(aes, out, in, inSz); - -#ifdef WOLFSSL_SMALL_STACK - XFREE(aes, NULL, DYNAMIC_TYPE_TMP_BUFFER); -#endif - - return ret; -} - - /* AES-DIRECT */ #if defined(WOLFSSL_AES_DIRECT) - #if defined(FREESCALE_MMCAU) + #if defined(HAVE_COLDFIRE_SEC) + #error "Coldfire SEC doesn't yet support AES direct" + #elif defined(FREESCALE_LTC) /* Allow direct access to one block encrypt */ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) { - byte* key; + byte *key; + uint32_t keySize; + key = (byte*)aes->key; + wc_AesGetKeySize(aes, &keySize); - return cau_aes_encrypt(in, key, aes->rounds, out); + LTC_AES_EncryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, + key, keySize); } /* Allow direct access to one block decrypt */ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) { - byte* key; + byte *key; + uint32_t keySize; + key = (byte*)aes->key; + wc_AesGetKeySize(aes, &keySize); - return cau_aes_decrypt(in, key, aes->rounds, out); + LTC_AES_DecryptEcb(LTC_BASE, in, out, AES_BLOCK_SIZE, + key, keySize, kLTC_EncryptKey); } - #elif defined(STM32F2_CRYPTO) - #error "STM32F2 crypto doesn't yet support AES direct" + #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ - #elif defined(HAVE_COLDFIRE_SEC) - #error "Coldfire SEC doesn't yet support AES direct" + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ - #elif defined(WOLFSSL_PIC32MZ_CRYPT) - #error "PIC32MZ doesn't yet support AES direct" + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(STM32_CRYPTO) + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesEncrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + if (wolfSSL_CryptHwMutexLock() == 0) { + wc_AesDecrypt(aes, in, out); + wolfSSL_CryptHwMutexUnLock(); + } + } + #endif /* HAVE_AES_DECRYPT */ + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + + /* Allow direct access to one block encrypt */ + void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesEncrypt(aes, in, out); + } + #ifdef HAVE_AES_DECRYPT + /* Allow direct access to one block decrypt */ + void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) + { + wc_AesDecrypt(aes, in, out); + } + #endif /* HAVE_AES_DECRYPT */ #else /* Allow direct access to one block encrypt */ void wc_AesEncryptDirect(Aes* aes, byte* out, const byte* in) { wc_AesEncrypt(aes, in, out); } - + #ifdef HAVE_AES_DECRYPT /* Allow direct access to one block decrypt */ void wc_AesDecryptDirect(Aes* aes, byte* out, const byte* in) { wc_AesDecrypt(aes, in, out); } - - #endif /* FREESCALE_MMCAU, AES direct block */ + #endif /* HAVE_AES_DECRYPT */ + #endif /* AES direct block */ #endif /* WOLFSSL_AES_DIRECT */ /* AES-CBC */ -#ifdef STM32F2_CRYPTO +#ifdef HAVE_AES_CBC +#if defined(STM32_CRYPTO) + +#ifdef WOLFSSL_STM32_CUBEMX int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 *enc_key, *iv; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_HandleTypeDef hcryp; - enc_key = aes->key; - iv = aes->reg; + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } - /* reset registers to their default values */ - CRYP_DeInit(); + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + HAL_CRYP_Init(&hcryp); - /* load key into correct registers */ - switch(aes->rounds) - { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; + while (blocks--) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Encrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; break; + } - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; + /* store iv for next call */ + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; + sz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } - default: + HAL_CRYP_DeInit(&hcryp); + + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_HandleTypeDef hcryp; + + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + /* if input and output same will overwrite input iv */ + XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_KEYDERIVATION_DECRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CBC; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CBC; + ByteReverseWords(aes->reg, aes->reg, AES_BLOCK_SIZE); + #endif + + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + HAL_CRYP_Init(&hcryp); + + while (blocks--) { + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCBC_Decrypt(&hcryp, (uint8_t*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; break; + } + + /* store iv for next call */ + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + } + + HAL_CRYP_DeInit(&hcryp); + wolfSSL_CryptHwMutexUnLock(); + + return ret; + } + #endif /* HAVE_AES_DECRYPT */ + +#else /* STD_PERI_LIB */ + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + word32 *iv; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + + /* reset registers to their default values */ + CRYP_DeInit(); + + /* set key */ + CRYP_KeyInit(&keyInit); /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); ByteReverseWords(iv, iv, AES_BLOCK_SIZE); - AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; - AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; - AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; - AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; - CRYP_IVInit(&AES_CRYP_IVInitStructure); - - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); - while (sz > 0) - { + while (blocks--) { /* flush IN/OUT FIFOs */ CRYP_FIFOFlush(); @@ -1883,7 +3118,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, CRYP_DataIn(*(uint32_t*)&in[12]); /* wait until the complete message has been processed */ - while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} *(uint32_t*)&out[0] = CRYP_DataOut(); *(uint32_t*)&out[4] = CRYP_DataOut(); @@ -1893,31 +3128,36 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* store iv for next call */ XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - sz -= 16; - in += 16; - out += 16; + sz -= AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; } /* disable crypto processor */ CRYP_Cmd(DISABLE); + wolfSSL_CryptHwMutexUnLock(); - return 0; + return ret; } + #ifdef HAVE_AES_DECRYPT int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 *dec_key, *iv; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; - - dec_key = aes->key; - iv = aes->reg; - - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + int ret; + word32 *iv; + word32 blocks = (sz / AES_BLOCK_SIZE); + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) + return ret; + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } /* if input and output same will overwrite input iv */ XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); @@ -1925,76 +3165,37 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* reset registers to their default values */ CRYP_DeInit(); - /* load key into correct registers */ - switch(aes->rounds) - { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[3]; - break; - - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[5]; - break; - - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = dec_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = dec_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = dec_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = dec_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = dec_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = dec_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = dec_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = dec_key[7]; - break; - - default: - break; - } - - /* set direction, mode, and datatype for key preparation */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_32b; - CRYP_Init(&AES_CRYP_InitStructure); - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + /* set direction and key */ + CRYP_KeyInit(&keyInit); + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_Key; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); /* wait until key has been prepared */ - while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} - /* set direction, mode, and datatype for decryption */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Decrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CBC; + CRYP_Init(&cryptInit); /* set iv */ + iv = aes->reg; + CRYP_IVStructInit(&ivInit); ByteReverseWords(iv, iv, AES_BLOCK_SIZE); - - AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; - AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; - AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; - AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; - CRYP_IVInit(&AES_CRYP_IVInitStructure); + ivInit.CRYP_IV0Left = iv[0]; + ivInit.CRYP_IV0Right = iv[1]; + ivInit.CRYP_IV1Left = iv[2]; + ivInit.CRYP_IV1Right = iv[3]; + CRYP_IVInit(&ivInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); - while (sz > 0) - { + while (blocks--) { /* flush IN/OUT FIFOs */ CRYP_FIFOFlush(); @@ -2004,7 +3205,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, CRYP_DataIn(*(uint32_t*)&in[12]); /* wait until the complete message has been processed */ - while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} *(uint32_t*)&out[0] = CRYP_DataOut(); *(uint32_t*)&out[4] = CRYP_DataOut(); @@ -2014,24 +3215,26 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* store iv for next call */ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); - sz -= 16; - in += 16; - out += 16; + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; } /* disable crypto processor */ CRYP_Cmd(DISABLE); + wolfSSL_CryptHwMutexUnLock(); - return 0; + return ret; } + #endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_STM32_CUBEMX */ #elif defined(HAVE_COLDFIRE_SEC) static int wc_AesCbcCrypt(Aes* aes, byte* po, const byte* pi, word32 sz, - word32 descHeader) + word32 descHeader) { #ifdef DEBUG_WOLFSSL int i; int stat1, stat2; int ret; - #endif + #endif int size; volatile int v; @@ -2039,7 +3242,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, if ((pi == NULL) || (po == NULL)) return BAD_FUNC_ARG; /*wrong pointer*/ - LockMutex(&Mutex_AesSEC); + wc_LockMutex(&Mutex_AesSEC); /* Set descriptor for SEC */ secDesc->length1 = 0x0; @@ -2049,9 +3252,9 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, secDesc->pointer2 = (byte *)secReg; /* Initial Vector */ switch(aes->rounds) { - case 10: secDesc->length3 = 16 ; break ; - case 12: secDesc->length3 = 24 ; break ; - case 14: secDesc->length3 = 32 ; break ; + case 10: secDesc->length3 = 16; break; + case 12: secDesc->length3 = 24; break; + case 14: secDesc->length3 = 32; break; } XMEMCPY(secKey, aes->key, secDesc->length3); @@ -2115,7 +3318,7 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, po += size; } - UnLockMutex(&Mutex_AesSEC); + wc_UnLockMutex(&Mutex_AesSEC); return 0; } @@ -2124,40 +3327,92 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_ENCRYPT)); } + #ifdef HAVE_AES_DECRYPT int wc_AesCbcDecrypt(Aes* aes, byte* po, const byte* pi, word32 sz) { return (wc_AesCbcCrypt(aes, po, pi, sz, SEC_DESC_AES_CBC_DECRYPT)); } + #endif /* HAVE_AES_DECRYPT */ -#elif defined(FREESCALE_MMCAU) +#elif defined(FREESCALE_LTC) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - int i; - int offset = 0; - int len = sz; - + uint32_t keySize; + status_t status; byte *iv, *enc_key; - byte temp_block[AES_BLOCK_SIZE]; + word32 blocks = (sz / AES_BLOCK_SIZE); iv = (byte*)aes->reg; enc_key = (byte*)aes->key; - if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { - WOLFSSL_MSG("Bad cau_aes_encrypt alignment"); - return BAD_ALIGN_E; + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; } - while (len > 0) - { + status = LTC_AES_EncryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, + iv, enc_key, keySize); + + /* store iv for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return (status == kStatus_Success) ? 0 : -1; + } + + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + uint32_t keySize; + status_t status; + byte* iv, *dec_key; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte temp_block[AES_BLOCK_SIZE]; + + iv = (byte*)aes->reg; + dec_key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + /* get IV for next call */ + XMEMCPY(temp_block, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + status = LTC_AES_DecryptCbc(LTC_BASE, in, out, blocks * AES_BLOCK_SIZE, + iv, dec_key, keySize, kLTC_EncryptKey); + + /* store IV for next call */ + if (status == kStatus_Success) { + XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); + } + + return (status == kStatus_Success) ? 0 : -1; + } + #endif /* HAVE_AES_DECRYPT */ + +#elif defined(FREESCALE_MMCAU) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int i; + int offset = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte *iv; + byte temp_block[AES_BLOCK_SIZE]; + + iv = (byte*)aes->reg; + + while (blocks--) { XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); /* XOR block with IV for CBC */ for (i = 0; i < AES_BLOCK_SIZE; i++) temp_block[i] ^= iv[i]; - cau_aes_encrypt(temp_block, enc_key, aes->rounds, out + offset); + wc_AesEncrypt(aes, temp_block, out + offset); - len -= AES_BLOCK_SIZE; offset += AES_BLOCK_SIZE; /* store IV for next block */ @@ -2166,29 +3421,21 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, return 0; } - + #ifdef HAVE_AES_DECRYPT int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { int i; int offset = 0; - int len = sz; - - byte* iv, *dec_key; + word32 blocks = (sz / AES_BLOCK_SIZE); + byte* iv; byte temp_block[AES_BLOCK_SIZE]; iv = (byte*)aes->reg; - dec_key = (byte*)aes->key; - if ((wolfssl_word)out % WOLFSSL_MMCAU_ALIGNMENT) { - WOLFSSL_MSG("Bad cau_aes_decrypt alignment"); - return BAD_ALIGN_E; - } - - while (len > 0) - { + while (blocks--) { XMEMCPY(temp_block, in + offset, AES_BLOCK_SIZE); - cau_aes_decrypt(in + offset, dec_key, aes->rounds, out + offset); + wc_AesDecrypt(aes, in + offset, out + offset); /* XOR block with IV for CBC */ for (i = 0; i < AES_BLOCK_SIZE; i++) @@ -2197,132 +3444,131 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* store IV for next block */ XMEMCPY(iv, temp_block, AES_BLOCK_SIZE); - len -= AES_BLOCK_SIZE; offset += AES_BLOCK_SIZE; } return 0; } + #endif /* HAVE_AES_DECRYPT */ #elif defined(WOLFSSL_PIC32MZ_CRYPT) - /* core hardware crypt engine driver */ - static void wc_AesCrypt(Aes *aes, byte* out, const byte* in, word32 sz, - int dir, int algo, int cryptoalgo) + + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - securityAssociation *sa_p ; - bufferDescriptor *bd_p ; - - volatile securityAssociation sa __attribute__((aligned (8))); - volatile bufferDescriptor bd __attribute__((aligned (8))); - volatile int k ; - - /* get uncached address */ - sa_p = KVA0_TO_KVA1(&sa) ; - bd_p = KVA0_TO_KVA1(&bd) ; - - /* Sync cache and physical memory */ - if(PIC32MZ_IF_RAM(in)) { - XMEMCPY((void *)KVA0_TO_KVA1(in), (void *)in, sz); - } - XMEMSET((void *)KVA0_TO_KVA1(out), 0, sz); - /* Set up the Security Association */ - XMEMSET((byte *)KVA0_TO_KVA1(&sa), 0, sizeof(sa)); - sa_p->SA_CTRL.ALGO = algo ; /* AES */ - sa_p->SA_CTRL.LNC = 1; - sa_p->SA_CTRL.LOADIV = 1; - sa_p->SA_CTRL.FB = 1; - sa_p->SA_CTRL.ENCTYPE = dir ; /* Encryption/Decryption */ - sa_p->SA_CTRL.CRYPTOALGO = cryptoalgo; - - if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM){ - switch(aes->keylen) { - case 32: - sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_256 ; - break ; - case 24: - sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_192 ; - break ; - case 16: - sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ; - break ; - } - } else - sa_p->SA_CTRL.KEYSIZE = PIC32_AES_KEYSIZE_128 ; - - ByteReverseWords( - (word32 *)KVA0_TO_KVA1(sa.SA_ENCKEY + 8 - aes->keylen/sizeof(word32)), - (word32 *)aes->key_ce, aes->keylen); - ByteReverseWords( - (word32*)KVA0_TO_KVA1(sa.SA_ENCIV), (word32 *)aes->iv_ce, 16); - - XMEMSET((byte *)KVA0_TO_KVA1(&bd), 0, sizeof(bd)); - /* Set up the Buffer Descriptor */ - bd_p->BD_CTRL.BUFLEN = sz; - if(cryptoalgo == PIC32_CRYPTOALGO_AES_GCM) { - if(sz % 0x10) - bd_p->BD_CTRL.BUFLEN = (sz/0x10 + 1) * 0x10 ; - } - bd_p->BD_CTRL.LIFM = 1; - bd_p->BD_CTRL.SA_FETCH_EN = 1; - bd_p->BD_CTRL.LAST_BD = 1; - bd_p->BD_CTRL.DESC_EN = 1; - - bd_p->SA_ADDR = (unsigned int)KVA_TO_PA(&sa) ; - bd_p->SRCADDR = (unsigned int)KVA_TO_PA(in) ; - bd_p->DSTADDR = (unsigned int)KVA_TO_PA(out); - bd_p->MSGLEN = sz ; - - CECON = 1 << 6; - while (CECON); - - /* Run the engine */ - CEBDPADDR = (unsigned int)KVA_TO_PA(&bd) ; - CEINTEN = 0x07; - CECON = 0x27; - - WAIT_ENGINE ; - - if((cryptoalgo == PIC32_CRYPTOALGO_CBC) || - (cryptoalgo == PIC32_CRYPTOALGO_TCBC)|| - (cryptoalgo == PIC32_CRYPTOALGO_RCBC)) { - /* set iv for the next call */ - if(dir == PIC32_ENCRYPTION) { - XMEMCPY((void *)aes->iv_ce, - (void*)KVA0_TO_KVA1(out + sz - AES_BLOCK_SIZE), - AES_BLOCK_SIZE) ; - } else { - ByteReverseWords((word32*)aes->iv_ce, - (word32 *)KVA0_TO_KVA1(in + sz - AES_BLOCK_SIZE), - AES_BLOCK_SIZE); - } + int ret; + + /* hardware fails on input that is not a multiple of AES block size */ + if (sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, sz, PIC32_ENCRYPTION, + PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); + + /* store iv for next call */ + if (ret == 0) { + XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + } + + return ret; + } + #ifdef HAVE_AES_DECRYPT + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + int ret; + byte scratch[AES_BLOCK_SIZE]; + + /* hardware fails on input that is not a multiple of AES block size */ + if (sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } + XMEMCPY(scratch, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, sz, PIC32_DECRYPTION, + PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCBC); + + /* store iv for next call */ + if (ret == 0) { + XMEMCPY((byte*)aes->reg, scratch, AES_BLOCK_SIZE); } - XMEMCPY((byte *)out, (byte *)KVA0_TO_KVA1(out), sz) ; - ByteReverseWords((word32*)out, (word32 *)out, sz); + + return ret; } + #endif /* HAVE_AES_DECRYPT */ +#elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - wc_AesCrypt(aes, out, in, sz, PIC32_ENCRYPTION, PIC32_ALGO_AES, - PIC32_CRYPTOALGO_RCBC ); - return 0 ; + return wc_esp32AesCbcEncrypt(aes, out, in, sz); + } + int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return wc_esp32AesCbcDecrypt(aes, out, in, sz); + } +#elif defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); } - int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - wc_AesCrypt(aes, out, in, sz, PIC32_DECRYPTION, PIC32_ALGO_AES, - PIC32_CRYPTOALGO_RCBC); - return 0 ; + return SaSi_AesBlock(&aes->ctx.user_ctx, (uint8_t* )in, sz, out); } +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_CBC) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ #else + + /* Software AES - CBC Encrypt */ int wc_AesCbcEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 blocks = sz / AES_BLOCK_SIZE; + word32 blocks = (sz / AES_BLOCK_SIZE); + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } - #ifdef HAVE_CAVIUM - if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC) - return wc_AesCaviumCbcEncrypt(aes, out, in, sz); + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcEncrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxAesCbcEncrypt(aes, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesCbcEncrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_ENCRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + return WC_PENDING_E; + } + #endif + } + #endif /* WOLFSSL_ASYNC_CRYPT */ #ifdef WOLFSSL_AESNI if (haveAESNI) { @@ -2337,22 +3583,25 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, #endif /* check alignment, decrypt doesn't need alignment */ - if ((wolfssl_word)in % 16) { + if ((wolfssl_word)in % AESNI_ALIGN) { #ifndef NO_WOLFSSL_ALLOC_ALIGN - byte* tmp = (byte*)XMALLOC(sz, NULL, DYNAMIC_TYPE_TMP_BUFFER); - WOLFSSL_MSG("AES-CBC encrypt with bad alignment"); + byte* tmp = (byte*)XMALLOC(sz + AES_BLOCK_SIZE + AESNI_ALIGN, + aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + byte* tmp_align; if (tmp == NULL) return MEMORY_E; - XMEMCPY(tmp, in, sz); - AES_CBC_encrypt(tmp, tmp, (byte*)aes->reg, sz, (byte*)aes->key, - aes->rounds); + tmp_align = tmp + (AESNI_ALIGN - ((size_t)tmp % AESNI_ALIGN)); + XMEMCPY(tmp_align, in, sz); + AES_CBC_encrypt(tmp_align, tmp_align, (byte*)aes->reg, sz, + (byte*)aes->key, aes->rounds); /* store iv for next call */ - XMEMCPY(aes->reg, tmp + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + XMEMCPY(aes->reg, tmp_align + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - XMEMCPY(out, tmp, sz); - XFREE(tmp, NULL, DYNAMIC_TYPE_TMP_BUFFER); + XMEMCPY(out, tmp_align, sz); + XFREE(tmp, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); return 0; #else + WOLFSSL_MSG("AES-CBC encrypt with bad alignment"); return BAD_ALIGN_E; #endif } @@ -2378,13 +3627,46 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, return 0; } + #ifdef HAVE_AES_DECRYPT + /* Software AES - CBC Decrypt */ int wc_AesCbcDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - word32 blocks = sz / AES_BLOCK_SIZE; + word32 blocks; + + if (aes == NULL || out == NULL || in == NULL + || sz % AES_BLOCK_SIZE != 0) { + return BAD_FUNC_ARG; + } - #ifdef HAVE_CAVIUM - if (aes->magic == WOLFSSL_AES_CAVIUM_MAGIC) - return wc_AesCaviumCbcDecrypt(aes, out, in, sz); + #ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesCbcDecrypt(aes, out, in, sz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } + #endif + #if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_CBC) { + #if defined(HAVE_CAVIUM) + return NitroxAesCbcDecrypt(aes, out, in, sz); + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesCbcDecrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, + (byte*)aes->reg, AES_BLOCK_SIZE); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_CBC_DECRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + return WC_PENDING_E; + } + #endif + } #endif #ifdef WOLFSSL_AESNI @@ -2401,18 +3683,28 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* if input and output same will overwrite input iv */ XMEMCPY(aes->tmp, in + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - AES_CBC_decrypt(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + #if defined(WOLFSSL_AESNI_BY4) + AES_CBC_decrypt_by4(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + #elif defined(WOLFSSL_AESNI_BY6) + AES_CBC_decrypt_by6(in, out, (byte*)aes->reg, sz, (byte*)aes->key, aes->rounds); + #else /* WOLFSSL_AESNI_BYx */ + AES_CBC_decrypt_by8(in, out, (byte*)aes->reg, sz, (byte*)aes->key, + aes->rounds); + #endif /* WOLFSSL_AESNI_BYx */ /* store iv for next call */ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); return 0; } #endif + blocks = sz / AES_BLOCK_SIZE; while (blocks--) { XMEMCPY(aes->tmp, in, AES_BLOCK_SIZE); wc_AesDecrypt(aes, (byte*)aes->tmp, out); xorbuf(out, (byte*)aes->reg, AES_BLOCK_SIZE); + /* store iv for next call */ XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); out += AES_BLOCK_SIZE; @@ -2421,198 +3713,226 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, return 0; } + #endif -#endif /* STM32F2_CRYPTO, AES-CBC block */ +#endif /* AES-CBC block */ +#endif /* HAVE_AES_CBC */ /* AES-CTR */ -#ifdef WOLFSSL_AES_COUNTER +#if defined(WOLFSSL_AES_COUNTER) - #ifdef STM32F2_CRYPTO - void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) - { - word32 *enc_key, *iv; - CRYP_InitTypeDef AES_CRYP_InitStructure; - CRYP_KeyInitTypeDef AES_CRYP_KeyInitStructure; - CRYP_IVInitTypeDef AES_CRYP_IVInitStructure; + #ifdef STM32_CRYPTO + #define NEED_AES_CTR_SOFT + #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock - enc_key = aes->key; - iv = aes->reg; + int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) + { + int ret = 0; + #ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; + #ifdef STM32_HAL_V2 + word32 iv[AES_BLOCK_SIZE/sizeof(word32)]; + #endif + #else + word32 *iv; + CRYP_InitTypeDef cryptInit; + CRYP_KeyInitTypeDef keyInit; + CRYP_IVInitTypeDef ivInit; + #endif - /* crypto structure initialization */ - CRYP_KeyStructInit(&AES_CRYP_KeyInitStructure); - CRYP_StructInit(&AES_CRYP_InitStructure); - CRYP_IVStructInit(&AES_CRYP_IVInitStructure); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } - /* reset registers to their default values */ - CRYP_DeInit(); + #ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } - /* load key into correct registers */ - switch(aes->rounds) - { - case 10: /* 128-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_128b; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[3]; - break; + #ifdef STM32_CRYPTO_AES_ONLY + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_CTR; + hcryp.Init.KeyWriteFlag = CRYP_KEY_WRITE_ENABLE; + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_CTR; + ByteReverseWords(iv, aes->reg, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)iv; + #else + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)aes->reg; + #endif + HAL_CRYP_Init(&hcryp); + + #ifdef STM32_CRYPTO_AES_ONLY + ret = HAL_CRYPEx_AES(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #elif defined(STM32_HAL_V2) + ret = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, AES_BLOCK_SIZE, + (uint32_t*)out, STM32_HAL_TIMEOUT); + #else + ret = HAL_CRYP_AESCTR_Encrypt(&hcryp, (byte*)in, AES_BLOCK_SIZE, + out, STM32_HAL_TIMEOUT); + #endif + if (ret != HAL_OK) { + ret = WC_TIMEOUT_E; + } + HAL_CRYP_DeInit(&hcryp); - case 12: /* 192-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_192b; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[5]; - break; + #else /* STD_PERI_LIB */ + ret = wc_Stm32_Aes_Init(aes, &cryptInit, &keyInit); + if (ret != 0) { + wolfSSL_CryptHwMutexUnLock(); + return ret; + } - case 14: /* 256-bit key */ - AES_CRYP_InitStructure.CRYP_KeySize = CRYP_KeySize_256b; - AES_CRYP_KeyInitStructure.CRYP_Key0Left = enc_key[0]; - AES_CRYP_KeyInitStructure.CRYP_Key0Right = enc_key[1]; - AES_CRYP_KeyInitStructure.CRYP_Key1Left = enc_key[2]; - AES_CRYP_KeyInitStructure.CRYP_Key1Right = enc_key[3]; - AES_CRYP_KeyInitStructure.CRYP_Key2Left = enc_key[4]; - AES_CRYP_KeyInitStructure.CRYP_Key2Right = enc_key[5]; - AES_CRYP_KeyInitStructure.CRYP_Key3Left = enc_key[6]; - AES_CRYP_KeyInitStructure.CRYP_Key3Right = enc_key[7]; - break; + /* reset registers to their default values */ + CRYP_DeInit(); - default: - break; - } - CRYP_KeyInit(&AES_CRYP_KeyInitStructure); + /* set key */ + CRYP_KeyInit(&keyInit); /* set iv */ - ByteReverseWords(iv, iv, AES_BLOCK_SIZE); - AES_CRYP_IVInitStructure.CRYP_IV0Left = iv[0]; - AES_CRYP_IVInitStructure.CRYP_IV0Right = iv[1]; - AES_CRYP_IVInitStructure.CRYP_IV1Left = iv[2]; - AES_CRYP_IVInitStructure.CRYP_IV1Right = iv[3]; - CRYP_IVInit(&AES_CRYP_IVInitStructure); - - /* set direction, mode, and datatype */ - AES_CRYP_InitStructure.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; - AES_CRYP_InitStructure.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; - AES_CRYP_InitStructure.CRYP_DataType = CRYP_DataType_8b; - CRYP_Init(&AES_CRYP_InitStructure); + iv = aes->reg; + CRYP_IVStructInit(&ivInit); + ivInit.CRYP_IV0Left = ByteReverseWord32(iv[0]); + ivInit.CRYP_IV0Right = ByteReverseWord32(iv[1]); + ivInit.CRYP_IV1Left = ByteReverseWord32(iv[2]); + ivInit.CRYP_IV1Right = ByteReverseWord32(iv[3]); + CRYP_IVInit(&ivInit); + + /* set direction and mode */ + cryptInit.CRYP_AlgoDir = CRYP_AlgoDir_Encrypt; + cryptInit.CRYP_AlgoMode = CRYP_AlgoMode_AES_CTR; + CRYP_Init(&cryptInit); /* enable crypto processor */ CRYP_Cmd(ENABLE); - while (sz > 0) - { - /* flush IN/OUT FIFOs */ - CRYP_FIFOFlush(); - - CRYP_DataIn(*(uint32_t*)&in[0]); - CRYP_DataIn(*(uint32_t*)&in[4]); - CRYP_DataIn(*(uint32_t*)&in[8]); - CRYP_DataIn(*(uint32_t*)&in[12]); - - /* wait until the complete message has been processed */ - while(CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} + /* flush IN/OUT FIFOs */ + CRYP_FIFOFlush(); - *(uint32_t*)&out[0] = CRYP_DataOut(); - *(uint32_t*)&out[4] = CRYP_DataOut(); - *(uint32_t*)&out[8] = CRYP_DataOut(); - *(uint32_t*)&out[12] = CRYP_DataOut(); + CRYP_DataIn(*(uint32_t*)&in[0]); + CRYP_DataIn(*(uint32_t*)&in[4]); + CRYP_DataIn(*(uint32_t*)&in[8]); + CRYP_DataIn(*(uint32_t*)&in[12]); - /* store iv for next call */ - XMEMCPY(aes->reg, out + sz - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + /* wait until the complete message has been processed */ + while (CRYP_GetFlagStatus(CRYP_FLAG_BUSY) != RESET) {} - sz -= 16; - in += 16; - out += 16; - } + *(uint32_t*)&out[0] = CRYP_DataOut(); + *(uint32_t*)&out[4] = CRYP_DataOut(); + *(uint32_t*)&out[8] = CRYP_DataOut(); + *(uint32_t*)&out[12] = CRYP_DataOut(); /* disable crypto processor */ CRYP_Cmd(DISABLE); + + #endif /* WOLFSSL_STM32_CUBEMX */ + + wolfSSL_CryptHwMutexUnLock(); + return ret; } + #elif defined(WOLFSSL_PIC32MZ_CRYPT) - void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + + #define NEED_AES_CTR_SOFT + #define XTRANSFORM_AESCTRBLOCK wc_AesCtrEncryptBlock + + int wc_AesCtrEncryptBlock(Aes* aes, byte* out, const byte* in) { - int i ; - char out_block[AES_BLOCK_SIZE] ; - int odd ; - int even ; - char *tmp ; /* (char *)aes->tmp, for short */ - - tmp = (char *)aes->tmp ; - if(aes->left) { - if((aes->left + sz) >= AES_BLOCK_SIZE){ - odd = AES_BLOCK_SIZE - aes->left ; - } else { - odd = sz ; - } - XMEMCPY(tmp+aes->left, in, odd) ; - if((odd+aes->left) == AES_BLOCK_SIZE){ - wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE, - PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR); - XMEMCPY(out, out_block+aes->left, odd) ; - aes->left = 0 ; - XMEMSET(tmp, 0x0, AES_BLOCK_SIZE) ; - /* Increment IV */ - for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { - if (++((byte *)aes->iv_ce)[i]) - break ; - } - } - in += odd ; - out+= odd ; - sz -= odd ; - } - odd = sz % AES_BLOCK_SIZE ; /* if there is tail flagment */ - if(sz / AES_BLOCK_SIZE) { - even = (sz/AES_BLOCK_SIZE)*AES_BLOCK_SIZE ; - wc_AesCrypt(aes, out, in, even, PIC32_ENCRYPTION, PIC32_ALGO_AES, - PIC32_CRYPTOALGO_RCTR); - out += even ; - in += even ; - do { /* Increment IV */ - for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { - if (++((byte *)aes->iv_ce)[i]) - break ; - } - even -= AES_BLOCK_SIZE ; - } while((int)even > 0) ; - } - if(odd) { - XMEMSET(tmp+aes->left, 0x0, AES_BLOCK_SIZE - aes->left) ; - XMEMCPY(tmp+aes->left, in, odd) ; - wc_AesCrypt(aes, out_block, tmp, AES_BLOCK_SIZE, - PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR); - XMEMCPY(out, out_block+aes->left,odd) ; - aes->left += odd ; - } + word32 tmpIv[AES_BLOCK_SIZE / sizeof(word32)]; + XMEMCPY(tmpIv, aes->reg, AES_BLOCK_SIZE); + return wc_Pic32AesCrypt( + aes->key, aes->keylen, tmpIv, AES_BLOCK_SIZE, + out, in, AES_BLOCK_SIZE, + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_RCTR); } #elif defined(HAVE_COLDFIRE_SEC) #error "Coldfire SEC doesn't currently support AES-CTR mode" - #elif defined(FREESCALE_MMCAU) - #error "Freescale mmCAU doesn't currently support AES-CTR mode" + #elif defined(FREESCALE_LTC) + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + { + uint32_t keySize; + byte *iv, *enc_key; + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } + + if (sz) { + iv = (byte*)aes->reg; + enc_key = (byte*)aes->key; + + wc_AesGetKeySize(aes, &keySize); + + LTC_AES_CryptCtr(LTC_BASE, in, out, sz, + iv, enc_key, keySize, (byte*)aes->tmp, + (uint32_t*)&aes->left); + } + + return 0; + } + + #elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ + + #elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + + #elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + + #elif defined(WOLFSSL_ESP32WROOM32_CRYPT) && \ + !defined(NO_WOLFSSL_ESP32WROOM32_CRYPT_AES) + /* esp32 doesn't support CRT mode by hw. */ + /* use aes ecnryption plus sw implementation */ + #define NEED_AES_CTR_SOFT #else + + /* Use software based AES counter */ + #define NEED_AES_CTR_SOFT + #endif + + #ifdef NEED_AES_CTR_SOFT /* Increment AES counter */ - static INLINE void IncrementAesCounter(byte* inOutCtr) + static WC_INLINE void IncrementAesCounter(byte* inOutCtr) { - int i; - /* in network byte order so start at end and work back */ + int i; for (i = AES_BLOCK_SIZE - 1; i >= 0; i--) { if (++inOutCtr[i]) /* we're done unless we overflow */ return; } } - void wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) + /* Software AES - CTR Encrypt */ + int wc_AesCtrEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - byte* tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + byte* tmp; + byte scratch[AES_BLOCK_SIZE]; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; while (aes->left && sz) { *(out++) = *(in++) ^ *(tmp++); aes->left--; @@ -2621,17 +3941,23 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, /* do as many block size ops as possible */ while (sz >= AES_BLOCK_SIZE) { - wc_AesEncrypt(aes, (byte*)aes->reg, out); + #ifdef XTRANSFORM_AESCTRBLOCK + XTRANSFORM_AESCTRBLOCK(aes, out, in); + #else + wc_AesEncrypt(aes, (byte*)aes->reg, scratch); + xorbuf(scratch, in, AES_BLOCK_SIZE); + XMEMCPY(out, scratch, AES_BLOCK_SIZE); + #endif IncrementAesCounter((byte*)aes->reg); - xorbuf(out, in, AES_BLOCK_SIZE); out += AES_BLOCK_SIZE; in += AES_BLOCK_SIZE; sz -= AES_BLOCK_SIZE; aes->left = 0; } + ForceZero(scratch, AES_BLOCK_SIZE); - /* handle non block size remaining and sotre unused byte count in left */ + /* handle non block size remaining and store unused byte count in left */ if (sz) { wc_AesEncrypt(aes, (byte*)aes->reg, (byte*)aes->tmp); IncrementAesCounter((byte*)aes->reg); @@ -2644,49 +3970,63 @@ int wc_AesCbcDecryptWithKey(byte* out, const byte* in, word32 inSz, aes->left--; } } + + return 0; } - #endif /* STM32F2_CRYPTO, AES-CTR block */ + #endif /* NEED_AES_CTR_SOFT */ #endif /* WOLFSSL_AES_COUNTER */ +#endif /* !WOLFSSL_ARMASM */ -#ifdef HAVE_AESGCM /* - * The IV for AES GCM, stored in struct Aes's member reg, is comprised of - * three parts in order: - * 1. The implicit IV. This is generated from the PRF using the shared - * secrets between endpoints. It is 4 bytes long. - * 2. The explicit IV. This is set by the user of the AES. It needs to be - * unique for each call to encrypt. The explicit IV is shared with the - * other end of the transaction in the clear. - * 3. The counter. Each block of data is encrypted with its own sequence - * number counter. + * The IV for AES GCM and CCM, stored in struct Aes's member reg, is comprised + * of two parts in order: + * 1. The fixed field which may be 0 or 4 bytes long. In TLS, this is set + * to the implicit IV. + * 2. The explicit IV is generated by wolfCrypt. It needs to be managed + * by wolfCrypt to ensure the IV is unique for each call to encrypt. + * The IV may be a 96-bit random value, or the 32-bit fixed value and a + * 64-bit set of 0 or random data. The final 32-bits of reg is used as a + * block counter during the encryption. */ -#ifdef STM32F2_CRYPTO - #error "STM32F2 crypto doesn't currently support AES-GCM mode" +#if (defined(HAVE_AESGCM) && !defined(WC_NO_RNG)) || defined(HAVE_AESCCM) +static WC_INLINE void IncCtr(byte* ctr, word32 ctrSz) +{ + int i; + for (i = ctrSz-1; i >= 0; i--) { + if (++ctr[i]) + break; + } +} +#endif /* HAVE_AESGCM || HAVE_AESCCM */ -#elif defined(HAVE_COLDFIRE_SEC) + +#ifdef HAVE_AESGCM + +#if defined(HAVE_COLDFIRE_SEC) #error "Coldfire SEC doesn't currently support AES-GCM mode" +#elif defined(WOLFSSL_NRF51_AES) + #error "nRF51 doesn't currently support AES-GCM mode" + #endif -enum { - CTR_SZ = 4 -}; +#ifdef WOLFSSL_ARMASM + /* implementation is located in wolfcrypt/src/port/arm/armv8-aes.c */ +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/afalg/afalg_aes.c */ -static INLINE void InitGcmCounter(byte* inOutCtr) -{ - inOutCtr[AES_BLOCK_SIZE - 4] = 0; - inOutCtr[AES_BLOCK_SIZE - 3] = 0; - inOutCtr[AES_BLOCK_SIZE - 2] = 0; - inOutCtr[AES_BLOCK_SIZE - 1] = 1; -} +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ +#else /* software + AESNI implementation */ -static INLINE void IncrementGcmCounter(byte* inOutCtr) +#if !defined(FREESCALE_LTC_AES_GCM) +static WC_INLINE void IncrementGcmCounter(byte* inOutCtr) { int i; @@ -2696,11 +4036,23 @@ static INLINE void IncrementGcmCounter(byte* inOutCtr) return; } } +#ifdef STM32_CRYPTO_AES_GCM +static WC_INLINE void DecrementGcmCounter(byte* inOutCtr) +{ + int i; + /* in network byte order so start at end and work back */ + for (i = AES_BLOCK_SIZE - 1; i >= AES_BLOCK_SIZE - CTR_SZ; i--) { + if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ + return; + } +} +#endif /* STM32_CRYPTO_AES_GCM */ +#endif /* !FREESCALE_LTC_AES_GCM */ #if defined(GCM_SMALL) || defined(GCM_TABLE) -static INLINE void FlattenSzInBits(byte* buf, word32 sz) +static WC_INLINE void FlattenSzInBits(byte* buf, word32 sz) { /* Multiply the sz by 8 */ word32 szHi = (sz >> (8*sizeof(sz) - 3)); @@ -2718,7 +4070,7 @@ static INLINE void FlattenSzInBits(byte* buf, word32 sz) } -static INLINE void RIGHTSHIFTX(byte* x) +static WC_INLINE void RIGHTSHIFTX(byte* x) { int i; int carryOut = 0; @@ -2762,39 +4114,1250 @@ static void GenerateM0(Aes* aes) #endif /* GCM_TABLE */ - +/* Software AES - GCM SetKey */ int wc_AesGcmSetKey(Aes* aes, const byte* key, word32 len) { int ret; byte iv[AES_BLOCK_SIZE]; - #ifdef FREESCALE_MMCAU - byte* rk = (byte*)aes->key; + #ifdef WOLFSSL_IMX6_CAAM_BLOB + byte local[32]; + word32 localSz = 32; + + if (len == (16 + WC_CAAM_BLOB_SZ) || + len == (24 + WC_CAAM_BLOB_SZ) || + len == (32 + WC_CAAM_BLOB_SZ)) { + if (wc_caamOpenBlob((byte*)key, len, local, &localSz) != 0) { + return BAD_FUNC_ARG; + } + + /* set local values */ + key = local; + len = localSz; + } #endif if (!((len == 16) || (len == 24) || (len == 32))) return BAD_FUNC_ARG; +#ifdef OPENSSL_EXTRA + if (aes != NULL) { + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; + } +#endif XMEMSET(iv, 0, AES_BLOCK_SIZE); ret = wc_AesSetKey(aes, key, len, iv, AES_ENCRYPTION); + #ifdef WOLFSSL_AESNI + /* AES-NI code generates its own H value. */ + if (haveAESNI) + return ret; + #endif /* WOLFSSL_AESNI */ + +#if !defined(FREESCALE_LTC_AES_GCM) if (ret == 0) { - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(iv, rk, aes->rounds, aes->H); - #else wc_AesEncrypt(aes, iv, aes->H); - #endif #ifdef GCM_TABLE GenerateM0(aes); #endif /* GCM_TABLE */ } +#endif /* FREESCALE_LTC_AES_GCM */ + +#if defined(WOLFSSL_XILINX_CRYPT) + wc_AesGcmSetKey_ex(aes, key, len, XSECURE_CSU_AES_KEY_SRC_KUP); +#elif defined(WOLFSSL_AFALG_XILINX_AES) + wc_AesGcmSetKey_ex(aes, key, len, 0); +#endif + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + XMEMCPY(aes->devKey, key, len); + } +#endif + +#ifdef WOLFSSL_IMX6_CAAM_BLOB + ForceZero(local, sizeof(local)); +#endif return ret; } -#if defined(GCM_SMALL) +#ifdef WOLFSSL_AESNI + +#if defined(USE_INTEL_SPEEDUP) + #define HAVE_INTEL_AVX1 + #define HAVE_INTEL_AVX2 +#endif /* USE_INTEL_SPEEDUP */ + +#ifndef _MSC_VER + +void AES_GCM_encrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, int nr) + XASM_LINK("AES_GCM_encrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_encrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_encrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, const unsigned char* key, + int nr) + XASM_LINK("AES_GCM_encrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ + +#ifdef HAVE_AES_DECRYPT +void AES_GCM_decrypt(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, int nr, + int* res) + XASM_LINK("AES_GCM_decrypt"); +#ifdef HAVE_INTEL_AVX1 +void AES_GCM_decrypt_avx1(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx1"); +#ifdef HAVE_INTEL_AVX2 +void AES_GCM_decrypt_avx2(const unsigned char *in, unsigned char *out, + const unsigned char* addt, const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, int tbytes, const unsigned char* key, + int nr, int* res) + XASM_LINK("AES_GCM_decrypt_avx2"); +#endif /* HAVE_INTEL_AVX2 */ +#endif /* HAVE_INTEL_AVX1 */ +#endif /* HAVE_AES_DECRYPT */ + +#else /* _MSC_VER */ + +#define S(w,z) ((char)((unsigned long long)(w) >> (8*(7-(z))) & 0xFF)) +#define M128_INIT(x,y) { S((x),7), S((x),6), S((x),5), S((x),4), \ + S((x),3), S((x),2), S((x),1), S((x),0), \ + S((y),7), S((y),6), S((y),5), S((y),4), \ + S((y),3), S((y),2), S((y),1), S((y),0) } + +static const __m128i MOD2_128 = + M128_INIT(0x1, (long long int)0xc200000000000000UL); + + +/* See Intel® Carry-Less Multiplication Instruction + * and its Usage for Computing the GCM Mode White Paper + * by Shay Gueron, Intel Mobility Group, Israel Development Center; + * and Michael E. Kounavis, Intel Labs, Circuits and Systems Research */ + + +/* Figure 9. AES-GCM – Encrypt With Single Block Ghash at a Time */ + +static const __m128i ONE = M128_INIT(0x0, 0x1); +#ifndef AES_GCM_AESNI_NO_UNROLL +static const __m128i TWO = M128_INIT(0x0, 0x2); +static const __m128i THREE = M128_INIT(0x0, 0x3); +static const __m128i FOUR = M128_INIT(0x0, 0x4); +static const __m128i FIVE = M128_INIT(0x0, 0x5); +static const __m128i SIX = M128_INIT(0x0, 0x6); +static const __m128i SEVEN = M128_INIT(0x0, 0x7); +static const __m128i EIGHT = M128_INIT(0x0, 0x8); +#endif +static const __m128i BSWAP_EPI64 = + M128_INIT(0x0001020304050607, 0x08090a0b0c0d0e0f); +static const __m128i BSWAP_MASK = + M128_INIT(0x08090a0b0c0d0e0f, 0x0001020304050607); + + +/* The following are for MSC based builds which do not allow + * inline assembly. Intrinsic functions are used instead. */ + +#define aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T) \ +do \ +{ \ + word32 iv12[4]; \ + iv12[0] = *(word32*)&ivec[0]; \ + iv12[1] = *(word32*)&ivec[4]; \ + iv12[2] = *(word32*)&ivec[8]; \ + iv12[3] = 0x01000000; \ + Y = _mm_loadu_si128((__m128i*)iv12); \ + \ + /* (Compute E[ZERO, KS] and E[Y0, KS] together */ \ + tmp1 = _mm_load_si128(&KEY[0]); \ + tmp2 = _mm_xor_si128(Y, KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp2 = _mm_aesenc_si128(tmp2, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + H = _mm_aesenclast_si128(tmp1, lastKey); \ + T = _mm_aesenclast_si128(tmp2, lastKey); \ + H = _mm_shuffle_epi8(H, BSWAP_MASK); \ +} \ +while (0) + +#define aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T) \ +do \ +{ \ + if (ibytes % 16) { \ + i = ibytes / 16; \ + for (j=0; j < (int)(ibytes%16); j++) \ + ((unsigned char*)&last_block)[j] = ivec[i*16+j]; \ + } \ + tmp1 = _mm_load_si128(&KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + H = _mm_aesenclast_si128(tmp1, lastKey); \ + H = _mm_shuffle_epi8(H, BSWAP_MASK); \ + Y = _mm_setzero_si128(); \ + for (i=0; i < (int)(ibytes/16); i++) { \ + tmp1 = _mm_loadu_si128(&((__m128i*)ivec)[i]); \ + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + } \ + if (ibytes % 16) { \ + tmp1 = last_block; \ + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + } \ + tmp1 = _mm_insert_epi64(tmp1, ibytes*8, 0); \ + tmp1 = _mm_insert_epi64(tmp1, 0, 1); \ + Y = _mm_xor_si128(Y, tmp1); \ + Y = gfmul_sw(Y, H); \ + Y = _mm_shuffle_epi8(Y, BSWAP_MASK); /* Compute E(K, Y0) */ \ + tmp1 = _mm_xor_si128(Y, KEY[0]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); \ + lastKey = KEY[10]; \ + if (nr > 10) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); \ + lastKey = KEY[12]; \ + if (nr > 12) { \ + tmp1 = _mm_aesenc_si128(tmp1, lastKey); \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); \ + lastKey = KEY[14]; \ + } \ + } \ + T = _mm_aesenclast_si128(tmp1, lastKey); \ +} \ +while (0) + +#define AES_ENC_8(j) \ + tmp1 = _mm_aesenc_si128(tmp1, KEY[j]); \ + tmp2 = _mm_aesenc_si128(tmp2, KEY[j]); \ + tmp3 = _mm_aesenc_si128(tmp3, KEY[j]); \ + tmp4 = _mm_aesenc_si128(tmp4, KEY[j]); \ + tmp5 = _mm_aesenc_si128(tmp5, KEY[j]); \ + tmp6 = _mm_aesenc_si128(tmp6, KEY[j]); \ + tmp7 = _mm_aesenc_si128(tmp7, KEY[j]); \ + tmp8 = _mm_aesenc_si128(tmp8, KEY[j]); + +#define AES_ENC_LAST_8() \ + tmp1 =_mm_aesenclast_si128(tmp1, lastKey); \ + tmp2 =_mm_aesenclast_si128(tmp2, lastKey); \ + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[i*8+0])); \ + tmp2 = _mm_xor_si128(tmp2, _mm_loadu_si128(&((__m128i*)in)[i*8+1])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+0], tmp1); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+1], tmp2); \ + tmp3 =_mm_aesenclast_si128(tmp3, lastKey); \ + tmp4 =_mm_aesenclast_si128(tmp4, lastKey); \ + tmp3 = _mm_xor_si128(tmp3, _mm_loadu_si128(&((__m128i*)in)[i*8+2])); \ + tmp4 = _mm_xor_si128(tmp4, _mm_loadu_si128(&((__m128i*)in)[i*8+3])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+2], tmp3); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+3], tmp4); \ + tmp5 =_mm_aesenclast_si128(tmp5, lastKey); \ + tmp6 =_mm_aesenclast_si128(tmp6, lastKey); \ + tmp5 = _mm_xor_si128(tmp5, _mm_loadu_si128(&((__m128i*)in)[i*8+4])); \ + tmp6 = _mm_xor_si128(tmp6, _mm_loadu_si128(&((__m128i*)in)[i*8+5])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+4], tmp5); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+5], tmp6); \ + tmp7 =_mm_aesenclast_si128(tmp7, lastKey); \ + tmp8 =_mm_aesenclast_si128(tmp8, lastKey); \ + tmp7 = _mm_xor_si128(tmp7, _mm_loadu_si128(&((__m128i*)in)[i*8+6])); \ + tmp8 = _mm_xor_si128(tmp8, _mm_loadu_si128(&((__m128i*)in)[i*8+7])); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+6], tmp7); \ + _mm_storeu_si128(&((__m128i*)out)[i*8+7], tmp8); + + +static __m128i gfmul_sw(__m128i a, __m128i b) +{ + __m128i r, t1, t2, t3, t4, t5, t6, t7; + t2 = _mm_shuffle_epi32(b, 78); + t3 = _mm_shuffle_epi32(a, 78); + t2 = _mm_xor_si128(t2, b); + t3 = _mm_xor_si128(t3, a); + t4 = _mm_clmulepi64_si128(b, a, 0x11); + t1 = _mm_clmulepi64_si128(b, a, 0x00); + t2 = _mm_clmulepi64_si128(t2, t3, 0x00); + t2 = _mm_xor_si128(t2, t1); + t2 = _mm_xor_si128(t2, t4); + t3 = _mm_slli_si128(t2, 8); + t2 = _mm_srli_si128(t2, 8); + t1 = _mm_xor_si128(t1, t3); + t4 = _mm_xor_si128(t4, t2); + + t5 = _mm_srli_epi32(t1, 31); + t6 = _mm_srli_epi32(t4, 31); + t1 = _mm_slli_epi32(t1, 1); + t4 = _mm_slli_epi32(t4, 1); + t7 = _mm_srli_si128(t5, 12); + t5 = _mm_slli_si128(t5, 4); + t6 = _mm_slli_si128(t6, 4); + t4 = _mm_or_si128(t4, t7); + t1 = _mm_or_si128(t1, t5); + t4 = _mm_or_si128(t4, t6); + + t5 = _mm_slli_epi32(t1, 31); + t6 = _mm_slli_epi32(t1, 30); + t7 = _mm_slli_epi32(t1, 25); + t5 = _mm_xor_si128(t5, t6); + t5 = _mm_xor_si128(t5, t7); + + t6 = _mm_srli_si128(t5, 4); + t5 = _mm_slli_si128(t5, 12); + t1 = _mm_xor_si128(t1, t5); + t7 = _mm_srli_epi32(t1, 1); + t3 = _mm_srli_epi32(t1, 2); + t2 = _mm_srli_epi32(t1, 7); + + t7 = _mm_xor_si128(t7, t3); + t7 = _mm_xor_si128(t7, t2); + t7 = _mm_xor_si128(t7, t6); + t7 = _mm_xor_si128(t7, t1); + r = _mm_xor_si128(t4, t7); + + return r; +} + +static void gfmul_only(__m128i a, __m128i b, __m128i* r0, __m128i* r1) +{ + __m128i t1, t2, t3, t4; + + /* 128 x 128 Carryless Multiply */ + t2 = _mm_shuffle_epi32(b, 78); + t3 = _mm_shuffle_epi32(a, 78); + t2 = _mm_xor_si128(t2, b); + t3 = _mm_xor_si128(t3, a); + t4 = _mm_clmulepi64_si128(b, a, 0x11); + t1 = _mm_clmulepi64_si128(b, a, 0x00); + t2 = _mm_clmulepi64_si128(t2, t3, 0x00); + t2 = _mm_xor_si128(t2, t1); + t2 = _mm_xor_si128(t2, t4); + t3 = _mm_slli_si128(t2, 8); + t2 = _mm_srli_si128(t2, 8); + t1 = _mm_xor_si128(t1, t3); + t4 = _mm_xor_si128(t4, t2); + *r0 = _mm_xor_si128(t1, *r0); + *r1 = _mm_xor_si128(t4, *r1); +} + +static __m128i gfmul_shl1(__m128i a) +{ + __m128i t1 = a, t2; + t2 = _mm_srli_epi64(t1, 63); + t1 = _mm_slli_epi64(t1, 1); + t2 = _mm_slli_si128(t2, 8); + t1 = _mm_or_si128(t1, t2); + /* if (a[1] >> 63) t1 = _mm_xor_si128(t1, MOD2_128); */ + a = _mm_shuffle_epi32(a, 0xff); + a = _mm_srai_epi32(a, 31); + a = _mm_and_si128(a, MOD2_128); + t1 = _mm_xor_si128(t1, a); + return t1; +} + +static __m128i ghash_red(__m128i r0, __m128i r1) +{ + __m128i t2, t3; + __m128i t5, t6, t7; + + t5 = _mm_slli_epi32(r0, 31); + t6 = _mm_slli_epi32(r0, 30); + t7 = _mm_slli_epi32(r0, 25); + t5 = _mm_xor_si128(t5, t6); + t5 = _mm_xor_si128(t5, t7); + + t6 = _mm_srli_si128(t5, 4); + t5 = _mm_slli_si128(t5, 12); + r0 = _mm_xor_si128(r0, t5); + t7 = _mm_srli_epi32(r0, 1); + t3 = _mm_srli_epi32(r0, 2); + t2 = _mm_srli_epi32(r0, 7); + + t7 = _mm_xor_si128(t7, t3); + t7 = _mm_xor_si128(t7, t2); + t7 = _mm_xor_si128(t7, t6); + t7 = _mm_xor_si128(t7, r0); + return _mm_xor_si128(r1, t7); +} + +static __m128i gfmul_shifted(__m128i a, __m128i b) +{ + __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); + gfmul_only(a, b, &t0, &t1); + return ghash_red(t0, t1); +} + +#ifndef AES_GCM_AESNI_NO_UNROLL +static __m128i gfmul8(__m128i a1, __m128i a2, __m128i a3, __m128i a4, + __m128i a5, __m128i a6, __m128i a7, __m128i a8, + __m128i b1, __m128i b2, __m128i b3, __m128i b4, + __m128i b5, __m128i b6, __m128i b7, __m128i b8) +{ + __m128i t0 = _mm_setzero_si128(), t1 = _mm_setzero_si128(); + gfmul_only(a1, b8, &t0, &t1); + gfmul_only(a2, b7, &t0, &t1); + gfmul_only(a3, b6, &t0, &t1); + gfmul_only(a4, b5, &t0, &t1); + gfmul_only(a5, b4, &t0, &t1); + gfmul_only(a6, b3, &t0, &t1); + gfmul_only(a7, b2, &t0, &t1); + gfmul_only(a8, b1, &t0, &t1); + return ghash_red(t0, t1); +} +#endif + + +static void AES_GCM_encrypt(const unsigned char *in, + unsigned char *out, + const unsigned char* addt, + const unsigned char* ivec, + unsigned char *tag, unsigned int nbytes, + unsigned int abytes, unsigned int ibytes, + unsigned int tbytes, + const unsigned char* key, int nr) +{ + int i, j ,k; + __m128i ctr1; + __m128i H, Y, T; + __m128i X = _mm_setzero_si128(); + __m128i *KEY = (__m128i*)key, lastKey; + __m128i last_block = _mm_setzero_si128(); + __m128i tmp1, tmp2; +#ifndef AES_GCM_AESNI_NO_UNROLL + __m128i HT[8]; + __m128i r0, r1; + __m128i XV; + __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; +#endif + + if (ibytes == GCM_NONCE_MID_SZ) + aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); + else + aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); + + for (i=0; i < (int)(abytes/16); i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j < (int)(abytes%16); j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi32(tmp1, ONE); + H = gfmul_shl1(H); + +#ifndef AES_GCM_AESNI_NO_UNROLL + i = 0; + if (nbytes >= 16*8) { + HT[0] = H; + HT[1] = gfmul_shifted(H, H); + HT[2] = gfmul_shifted(H, HT[1]); + HT[3] = gfmul_shifted(HT[1], HT[1]); + HT[4] = gfmul_shifted(HT[1], HT[2]); + HT[5] = gfmul_shifted(HT[2], HT[2]); + HT[6] = gfmul_shifted(HT[2], HT[3]); + HT[7] = gfmul_shifted(HT[3], HT[3]); + + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_add_epi32(ctr1, ONE); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); + tmp3 = _mm_add_epi32(ctr1, TWO); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); + tmp4 = _mm_add_epi32(ctr1, THREE); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); + tmp5 = _mm_add_epi32(ctr1, FOUR); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); + tmp6 = _mm_add_epi32(ctr1, FIVE); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); + tmp7 = _mm_add_epi32(ctr1, SIX); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); + tmp8 = _mm_add_epi32(ctr1, SEVEN); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, EIGHT); + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + AES_ENC_8(1); + AES_ENC_8(2); + AES_ENC_8(3); + AES_ENC_8(4); + AES_ENC_8(5); + AES_ENC_8(6); + AES_ENC_8(7); + AES_ENC_8(8); + AES_ENC_8(9); + lastKey = KEY[10]; + if (nr > 10) { + AES_ENC_8(10); + AES_ENC_8(11); + lastKey = KEY[12]; + if (nr > 12) { + AES_ENC_8(12); + AES_ENC_8(13); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + + for (i=1; i < (int)(nbytes/16/8); i++) { + r0 = _mm_setzero_si128(); + r1 = _mm_setzero_si128(); + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_add_epi32(ctr1, ONE); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); + tmp3 = _mm_add_epi32(ctr1, TWO); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); + tmp4 = _mm_add_epi32(ctr1, THREE); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); + tmp5 = _mm_add_epi32(ctr1, FOUR); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); + tmp6 = _mm_add_epi32(ctr1, FIVE); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); + tmp7 = _mm_add_epi32(ctr1, SIX); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); + tmp8 = _mm_add_epi32(ctr1, SEVEN); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, EIGHT); + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+0]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + gfmul_only(XV, HT[7], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[1]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[1]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[1]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[1]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[1]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+1]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[6], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[2]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[2]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[2]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[2]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[2]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[2]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+2]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[5], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[3]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[3]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[3]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[3]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[3]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[3]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+3]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[4], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[4]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[4]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[4]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[4]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[4]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[4]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+4]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[3], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[5]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[5]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[5]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[5]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[5]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[5]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+5]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[2], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[6]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[6]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[6]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[6]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[6]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[6]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+6]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[1], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[7]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[7]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[7]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[7]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[7]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[7]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)out)[(i-1)*8+7]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[0], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[8]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[8]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[8]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[8]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[8]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[8]); + /* Reduction */ + X = ghash_red(r0, r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[9]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[9]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[9]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[9]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[9]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + } + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_MASK); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_MASK); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_MASK); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_MASK); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_MASK); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_MASK); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_MASK); + tmp1 = _mm_xor_si128(X, tmp1); + X = gfmul8(tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8, + HT[0], HT[1], HT[2], HT[3], HT[4], HT[5], HT[6], HT[7]); + } + for (k = i*8; k < (int)(nbytes/16); k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + } +#else /* AES_GCM_AESNI_NO_UNROLL */ + for (k = 0; k < (int)(nbytes/16) && k < 1; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + } + for (; k < (int)(nbytes/16); k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + X = gfmul_shifted(X, H); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp1 = _mm_xor_si128(tmp1, _mm_loadu_si128(&((__m128i*)in)[k])); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + } + if (k > 0) { + X = gfmul_shifted(X, H); + } +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + /* If one partial block remains */ + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + last_block = tmp1; + for (j=0; j < (int)(nbytes%16); j++) + ((unsigned char*)&last_block)[j] = in[k*16+j]; + tmp1 = _mm_xor_si128(tmp1, last_block); + last_block = tmp1; + for (j=0; j < (int)(nbytes%16); j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X =_mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + } + tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); + X = _mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + /*_mm_storeu_si128((__m128i*)tag, T);*/ + XMEMCPY(tag, &T, tbytes); +} + +#ifdef HAVE_AES_DECRYPT + +static void AES_GCM_decrypt(const unsigned char *in, + unsigned char *out, + const unsigned char* addt, + const unsigned char* ivec, + const unsigned char *tag, int nbytes, int abytes, + int ibytes, word32 tbytes, const unsigned char* key, + int nr, int* res) +{ + int i, j ,k; + __m128i H, Y, T; + __m128i *KEY = (__m128i*)key, lastKey; + __m128i ctr1; + __m128i last_block = _mm_setzero_si128(); + __m128i X = _mm_setzero_si128(); + __m128i tmp1, tmp2, XV; +#ifndef AES_GCM_AESNI_NO_UNROLL + __m128i HT[8]; + __m128i r0, r1; + __m128i tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + if (ibytes == GCM_NONCE_MID_SZ) + aes_gcm_calc_iv_12(KEY, ivec, nr, H, Y, T); + else + aes_gcm_calc_iv(KEY, ivec, ibytes, nr, H, Y, T); + + for (i=0; i<abytes/16; i++) { + tmp1 = _mm_loadu_si128(&((__m128i*)addt)[i]); + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + if (abytes%16) { + last_block = _mm_setzero_si128(); + for (j=0; j<abytes%16; j++) + ((unsigned char*)&last_block)[j] = addt[i*16+j]; + tmp1 = last_block; + tmp1 = _mm_shuffle_epi8(tmp1, BSWAP_MASK); + X = _mm_xor_si128(X, tmp1); + X = gfmul_sw(X, H); + } + + tmp1 = _mm_shuffle_epi8(Y, BSWAP_EPI64); + ctr1 = _mm_add_epi32(tmp1, ONE); + H = gfmul_shl1(H); + i = 0; + +#ifndef AES_GCM_AESNI_NO_UNROLL + + if (0 < nbytes/16/8) { + HT[0] = H; + HT[1] = gfmul_shifted(H, H); + HT[2] = gfmul_shifted(H, HT[1]); + HT[3] = gfmul_shifted(HT[1], HT[1]); + HT[4] = gfmul_shifted(HT[1], HT[2]); + HT[5] = gfmul_shifted(HT[2], HT[2]); + HT[6] = gfmul_shifted(HT[2], HT[3]); + HT[7] = gfmul_shifted(HT[3], HT[3]); + + for (; i < nbytes/16/8; i++) { + r0 = _mm_setzero_si128(); + r1 = _mm_setzero_si128(); + + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp2 = _mm_add_epi32(ctr1, ONE); + tmp2 = _mm_shuffle_epi8(tmp2, BSWAP_EPI64); + tmp3 = _mm_add_epi32(ctr1, TWO); + tmp3 = _mm_shuffle_epi8(tmp3, BSWAP_EPI64); + tmp4 = _mm_add_epi32(ctr1, THREE); + tmp4 = _mm_shuffle_epi8(tmp4, BSWAP_EPI64); + tmp5 = _mm_add_epi32(ctr1, FOUR); + tmp5 = _mm_shuffle_epi8(tmp5, BSWAP_EPI64); + tmp6 = _mm_add_epi32(ctr1, FIVE); + tmp6 = _mm_shuffle_epi8(tmp6, BSWAP_EPI64); + tmp7 = _mm_add_epi32(ctr1, SIX); + tmp7 = _mm_shuffle_epi8(tmp7, BSWAP_EPI64); + tmp8 = _mm_add_epi32(ctr1, SEVEN); + tmp8 = _mm_shuffle_epi8(tmp8, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, EIGHT); + tmp1 =_mm_xor_si128(tmp1, KEY[0]); + tmp2 =_mm_xor_si128(tmp2, KEY[0]); + tmp3 =_mm_xor_si128(tmp3, KEY[0]); + tmp4 =_mm_xor_si128(tmp4, KEY[0]); + tmp5 =_mm_xor_si128(tmp5, KEY[0]); + tmp6 =_mm_xor_si128(tmp6, KEY[0]); + tmp7 =_mm_xor_si128(tmp7, KEY[0]); + tmp8 =_mm_xor_si128(tmp8, KEY[0]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+0]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + gfmul_only(XV, HT[7], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[1]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[1]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[1]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[1]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[1]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[1]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[1]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+1]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[6], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[2]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[2]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[2]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[2]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[2]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[2]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[2]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+2]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[5], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[3]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[3]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[3]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[3]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[3]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[3]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[3]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+3]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[4], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[4]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[4]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[4]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[4]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[4]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[4]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[4]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+4]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[3], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[5]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[5]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[5]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[5]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[5]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[5]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[5]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+5]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[2], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[6]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[6]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[6]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[6]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[6]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[6]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[6]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+6]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[1], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[7]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[7]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[7]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[7]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[7]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[7]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[7]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[i*8+7]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + gfmul_only(XV, HT[0], &r0, &r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[8]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[8]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[8]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[8]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[8]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[8]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[8]); + /* Reduction */ + X = ghash_red(r0, r1); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[9]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[9]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[9]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[9]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[9]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[9]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[10]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[10]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[10]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[10]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[10]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[10]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[10]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[10]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[11]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[11]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[11]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[11]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[11]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[11]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, KEY[12]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[12]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[12]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[12]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[12]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[12]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[12]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[12]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + tmp2 = _mm_aesenc_si128(tmp2, KEY[13]); + tmp3 = _mm_aesenc_si128(tmp3, KEY[13]); + tmp4 = _mm_aesenc_si128(tmp4, KEY[13]); + tmp5 = _mm_aesenc_si128(tmp5, KEY[13]); + tmp6 = _mm_aesenc_si128(tmp6, KEY[13]); + tmp7 = _mm_aesenc_si128(tmp7, KEY[13]); + tmp8 = _mm_aesenc_si128(tmp8, KEY[13]); + lastKey = KEY[14]; + } + } + AES_ENC_LAST_8(); + } + } + +#endif /* AES_GCM_AESNI_NO_UNROLL */ + + for (k = i*8; k < nbytes/16; k++) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + ctr1 = _mm_add_epi32(ctr1, ONE); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + /* 128 x 128 Carryless Multiply */ + XV = _mm_loadu_si128(&((__m128i*)in)[k]); + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + X = gfmul_shifted(XV, H); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + tmp2 = _mm_loadu_si128(&((__m128i*)in)[k]); + tmp1 = _mm_xor_si128(tmp1, tmp2); + _mm_storeu_si128(&((__m128i*)out)[k], tmp1); + } + + /* If one partial block remains */ + if (nbytes % 16) { + tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64); + tmp1 = _mm_xor_si128(tmp1, KEY[0]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[1]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[2]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[3]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[4]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[5]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[6]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[7]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[8]); + tmp1 = _mm_aesenc_si128(tmp1, KEY[9]); + lastKey = KEY[10]; + if (nr > 10) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[11]); + lastKey = KEY[12]; + if (nr > 12) { + tmp1 = _mm_aesenc_si128(tmp1, lastKey); + tmp1 = _mm_aesenc_si128(tmp1, KEY[13]); + lastKey = KEY[14]; + } + } + tmp1 = _mm_aesenclast_si128(tmp1, lastKey); + last_block = _mm_setzero_si128(); + for (j=0; j < nbytes%16; j++) + ((unsigned char*)&last_block)[j] = in[k*16+j]; + XV = last_block; + tmp1 = _mm_xor_si128(tmp1, last_block); + last_block = tmp1; + for (j=0; j < nbytes%16; j++) + out[k*16+j] = ((unsigned char*)&last_block)[j]; + XV = _mm_shuffle_epi8(XV, BSWAP_MASK); + XV = _mm_xor_si128(XV, X); + X = gfmul_shifted(XV, H); + } + + tmp1 = _mm_insert_epi64(tmp1, nbytes*8, 0); + tmp1 = _mm_insert_epi64(tmp1, abytes*8, 1); + /* 128 x 128 Carryless Multiply */ + X = _mm_xor_si128(X, tmp1); + X = gfmul_shifted(X, H); + X = _mm_shuffle_epi8(X, BSWAP_MASK); + T = _mm_xor_si128(X, T); + +/* if (0xffff != + _mm_movemask_epi8(_mm_cmpeq_epi8(T, _mm_loadu_si128((__m128i*)tag)))) */ + if (XMEMCMP(tag, &T, tbytes) != 0) + *res = 0; /* in case the authentication failed */ + else + *res = 1; /* when successful returns 1 */ +} + +#endif /* HAVE_AES_DECRYPT */ +#endif /* _MSC_VER */ +#endif /* WOLFSSL_AESNI */ + + +#if defined(GCM_SMALL) static void GMULT(byte* X, byte* Y) { byte Z[AES_BLOCK_SIZE]; @@ -2820,8 +5383,8 @@ static void GMULT(byte* X, byte* Y) } -static void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) { byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; @@ -2969,8 +5532,8 @@ static void GMULT(byte *x, byte m[256][AES_BLOCK_SIZE]) } -static void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) { byte x[AES_BLOCK_SIZE]; byte scratch[AES_BLOCK_SIZE]; @@ -3025,12 +5588,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, /* end GCM_TABLE */ #elif defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) +#if !defined(FREESCALE_LTC_AES_GCM) static void GMULT(word64* X, word64* Y) { word64 Z[2] = {0,0}; - word64 V[2] ; + word64 V[2]; int i, j; - V[0] = X[0] ; V[1] = X[1] ; + V[0] = X[0]; V[1] = X[1]; for (i = 0; i < 2; i++) { @@ -3044,13 +5608,15 @@ static void GMULT(word64* X, word64* Y) if (V[1] & 0x0000000000000001) { V[1] >>= 1; - V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0); + V[1] |= ((V[0] & 0x0000000000000001) ? + 0x8000000000000000ULL : 0); V[0] >>= 1; V[0] ^= 0xE100000000000000ULL; } else { V[1] >>= 1; - V[1] |= ((V[0] & 0x0000000000000001) ? 0x8000000000000000ULL : 0); + V[1] |= ((V[0] & 0x0000000000000001) ? + 0x8000000000000000ULL : 0); V[0] >>= 1; } y <<= 1; @@ -3061,8 +5627,8 @@ static void GMULT(word64* X, word64* Y) } -static void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) { word64 x[2] = {0,0}; word32 blocks, partial; @@ -3070,7 +5636,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, XMEMCPY(bigH, aes->H, AES_BLOCK_SIZE); #ifdef LITTLE_ENDIAN_ORDER - ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE); + ByteReverseWords64(bigH, bigH, AES_BLOCK_SIZE); #endif /* Hash in A, the Additional Authentication Data */ @@ -3098,6 +5664,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, x[1] ^= bigA[1]; GMULT(x, bigH); } +#ifdef OPENSSL_EXTRA + /* store AAD partial tag for next call */ + aes->aadH[0] = (word32)((x[0] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[1] = (word32)(x[0] & 0xFFFFFFFF); + aes->aadH[2] = (word32)((x[1] & 0xFFFFFFFF00000000) >> 32); + aes->aadH[3] = (word32)(x[1] & 0xFFFFFFFF); +#endif } /* Hash in C, the Ciphertext */ @@ -3105,6 +5678,13 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, word64 bigC[2]; blocks = cSz / AES_BLOCK_SIZE; partial = cSz % AES_BLOCK_SIZE; +#ifdef OPENSSL_EXTRA + /* Start from last AAD partial tag */ + if(aes->aadLen) { + x[0] = ((word64)aes->aadH[0]) << 32 | aes->aadH[1]; + x[1] = ((word64)aes->aadH[2]) << 32 | aes->aadH[3]; + } +#endif while (blocks--) { XMEMCPY(bigC, c, AES_BLOCK_SIZE); #ifdef LITTLE_ENDIAN_ORDER @@ -3129,9 +5709,12 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, /* Hash in the lengths in bits of A and C */ { - word64 len[2] ; - len[0] = aSz ; len[1] = cSz; - + word64 len[2]; + len[0] = aSz; len[1] = cSz; +#ifdef OPENSSL_EXTRA + if (aes->aadLen) + len[0] = (word64)aes->aadLen; +#endif /* Lengths are in bytes. Convert to bits. */ len[0] *= 8; len[1] *= 8; @@ -3145,6 +5728,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, #endif XMEMCPY(s, x, sSz); } +#endif /* !FREESCALE_LTC_AES_GCM */ /* end defined(WORD64_AVAILABLE) && !defined(GCM_WORD32) */ #else /* GCM_WORD32 */ @@ -3152,7 +5736,7 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, static void GMULT(word32* X, word32* Y) { word32 Z[4] = {0,0,0,0}; - word32 V[4] ; + word32 V[4]; int i, j; V[0] = X[0]; V[1] = X[1]; V[2] = X[2]; V[3] = X[3]; @@ -3197,8 +5781,8 @@ static void GMULT(word32* X, word32* Y) } -static void GHASH(Aes* aes, const byte* a, word32 aSz, - const byte* c, word32 cSz, byte* s, word32 sSz) +void GHASH(Aes* aes, const byte* a, word32 aSz, const byte* c, + word32 cSz, byte* s, word32 sSz) { word32 x[4] = {0,0,0,0}; word32 blocks, partial; @@ -3296,165 +5880,1071 @@ static void GHASH(Aes* aes, const byte* a, word32 aSz, #endif /* end GCM_WORD32 */ +#if !defined(WOLFSSL_XILINX_CRYPT) && !defined(WOLFSSL_AFALG_XILINX_AES) +#ifdef FREESCALE_LTC_AES_GCM int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { + status_t status; + word32 keySize; + + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { + return BAD_FUNC_ARG; + } + + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + + status = wc_AesGetKeySize(aes, &keySize); + if (status) + return status; + + status = LTC_AES_EncryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, + authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); + + return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; +} + +#else + +#ifdef STM32_CRYPTO_AES_GCM + +/* this function supports inline encrypt */ +static int wc_AesGcmEncrypt_STM32(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif + word32 keySize; + int status = HAL_OK; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + byte tag[AES_BLOCK_SIZE]; + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; + byte* authInPadded = NULL; + int authPadSz; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; +#endif + + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } + + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ + authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (authInPadded == NULL) { + wolfSSL_CryptHwMutexUnLock(); + return MEMORY_E; + } + XMEMSET(authInPadded, 0, authPadSz); + XMEMCPY(authInPadded, authIn, authInSz); + } else { + authPadSz = authInSz; + authInPadded = (byte*)authIn; + } + +#ifdef WOLFSSL_STM32_CUBEMX + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; + hcryp.Init.HeaderSize = authInSz; + +#ifdef STM32_CRYPTO_AES_ONLY + /* Set the CRYP parameters */ + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; + hcryp.Init.OperatingMode = CRYP_ALGOMODE_ENCRYPT; + hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; + HAL_CRYP_Init(&hcryp); + + /* GCM init phase */ + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* GCM header phase */ + hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Encrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); + } +#else + HAL_CRYP_Init(&hcryp); + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Encrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); + } +#endif + + if (status != HAL_OK) + ret = AES_GCM_AUTH_E; + HAL_CRYP_DeInit(&hcryp); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, keySize); + status = CRYP_AES_GCM(MODE_ENCRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); + if (status != SUCCESS) + ret = AES_GCM_AUTH_E; +#endif /* WOLFSSL_STM32_CUBEMX */ + + if (ret == 0) { + /* return authTag */ + if (authTag) { + /* STM32 GCM won't compute Auth correctly for partial or + when IV != 12, so use software here */ + if (sz == 0 || partial != 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, ctr, tag); + xorbuf(authTag, tag, authTagSz); + } + else { + XMEMCPY(authTag, tag, authTagSz); + } + } + } + + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { + XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } + + wolfSSL_CryptHwMutexUnLock(); + + return ret; +} + +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI +int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); +#else +static +#endif +int AES_GCM_encrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; word32 blocks = sz / AES_BLOCK_SIZE; word32 partial = sz % AES_BLOCK_SIZE; const byte* p = in; byte* c = out; byte counter[AES_BLOCK_SIZE]; - byte *ctr ; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr; byte scratch[AES_BLOCK_SIZE]; - -#ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; +#ifdef OPENSSL_EXTRA + word32 aadTemp; #endif - - WOLFSSL_ENTER("AesGcmEncrypt"); + ctr = counter; + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + XMEMSET(scratch, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); #ifdef WOLFSSL_PIC32MZ_CRYPT - ctr = (char *)aes->iv_ce ; -#else - ctr = counter ; + if (blocks) { + /* use initial IV for HW, but don't use it below */ + XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, (blocks * AES_BLOCK_SIZE), + PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); + if (ret != 0) + return ret; + } + /* process remainder using partial handling */ #endif - XMEMSET(ctr, 0, AES_BLOCK_SIZE); - XMEMCPY(ctr, iv, ivSz); - InitGcmCounter(ctr); +#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) + /* some hardware acceleration can gain performance from doing AES encryption + * of the whole buffer at once */ + if (c != p && blocks > 0) { /* can not handle inline encryption */ + while (blocks--) { + IncrementGcmCounter(ctr); + XMEMCPY(c, ctr, AES_BLOCK_SIZE); + c += AES_BLOCK_SIZE; + } + + /* reset number of blocks and then do encryption */ + blocks = sz / AES_BLOCK_SIZE; + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); + xorbuf(out, p, AES_BLOCK_SIZE * blocks); + p += AES_BLOCK_SIZE * blocks; + } + else +#endif /* HAVE_AES_ECB && !WOLFSSL_PIC32MZ_CRYPT */ -#ifdef WOLFSSL_PIC32MZ_CRYPT - if(blocks) - wc_AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE, - PIC32_ENCRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM ); -#endif while (blocks--) { IncrementGcmCounter(ctr); - #ifndef WOLFSSL_PIC32MZ_CRYPT - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, scratch); - #else - wc_AesEncrypt(aes, ctr, scratch); - #endif + #if !defined(WOLFSSL_PIC32MZ_CRYPT) + wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, p, AES_BLOCK_SIZE); XMEMCPY(c, scratch, AES_BLOCK_SIZE); - #endif + #endif p += AES_BLOCK_SIZE; c += AES_BLOCK_SIZE; } if (partial != 0) { IncrementGcmCounter(ctr); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, scratch); - #else - wc_AesEncrypt(aes, ctr, scratch); - #endif + wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, p, partial); XMEMCPY(c, scratch, partial); + } + if (authTag) { + GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); + wc_AesEncrypt(aes, initialCounter, scratch); + xorbuf(authTag, scratch, authTagSz); +#ifdef OPENSSL_EXTRA + if (!in && !sz) + /* store AAD size for next call */ + aes->aadLen = authInSz; +#endif + } + + return ret; +} +/* Software AES - GCM Encrypt */ +int wc_AesGcmEncrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + /* argument checks */ + if (aes == NULL || authTagSz > AES_BLOCK_SIZE || ivSz == 0) { + return BAD_FUNC_ARG; } - GHASH(aes, authIn, authInSz, out, sz, authTag, authTagSz); - InitGcmCounter(ctr); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, scratch); - #else - wc_AesEncrypt(aes, ctr, scratch); + if (authTagSz < WOLFSSL_MIN_AUTH_TAG_SZ) { + WOLFSSL_MSG("GcmEncrypt authTagSz too small error"); + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmEncrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + /* only 12-byte IV is supported in HW */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { + #if defined(HAVE_CAVIUM) + #ifdef HAVE_CAVIUM_V + if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ + return NitroxAesGcmEncrypt(aes, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } + #endif + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesGcmEncrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_ENCRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + testDev->aes.iv = iv; + testDev->aes.ivSz = ivSz; + testDev->aes.authTag = authTag; + testDev->aes.authTagSz = authTagSz; + testDev->aes.authIn = authIn; + testDev->aes.authInSz = authInSz; + return WC_PENDING_E; + } #endif - xorbuf(authTag, scratch, authTagSz); + } +#endif /* WOLFSSL_ASYNC_CRYPT */ - return 0; +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmEncrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_encrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_encrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else + #endif + if (haveAESNI) { + AES_GCM_encrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (const byte*)aes->key, aes->rounds); + return 0; + } + else +#endif + { + return AES_GCM_encrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz); + } } +#endif + +/* AES GCM Decrypt */ +#if defined(HAVE_AES_DECRYPT) || defined(HAVE_AESGCM_DECRYPT) +#ifdef FREESCALE_LTC_AES_GCM int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, const byte* iv, word32 ivSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { + int ret; + word32 keySize; + status_t status; + + /* argument checks */ + /* If the sz is non-zero, both in and out must be set. If sz is 0, + * in and out are don't cares, as this is is the GMAC case. */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) { + return ret; + } + + status = LTC_AES_DecryptTagGcm(LTC_BASE, in, out, sz, iv, ivSz, + authIn, authInSz, (byte*)aes->key, keySize, authTag, authTagSz); + + return (status == kStatus_Success) ? 0 : AES_GCM_AUTH_E; +} + +#else + +#ifdef STM32_CRYPTO_AES_GCM +/* this function supports inline decrypt */ +static int wc_AesGcmDecrypt_STM32(Aes* aes, byte* out, + const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret; +#ifdef WOLFSSL_STM32_CUBEMX + CRYP_HandleTypeDef hcryp; +#else + word32 keyCopy[AES_256_KEY_SIZE/sizeof(word32)]; +#endif + word32 keySize; + int status = HAL_OK; word32 blocks = sz / AES_BLOCK_SIZE; word32 partial = sz % AES_BLOCK_SIZE; - const byte* c = in; - byte* p = out; - byte counter[AES_BLOCK_SIZE]; - byte *ctr ; - byte scratch[AES_BLOCK_SIZE]; - -#ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; + byte tag[AES_BLOCK_SIZE]; + byte partialBlock[AES_BLOCK_SIZE]; + byte ctr[AES_BLOCK_SIZE]; + byte* authInPadded = NULL; + int authPadSz; + + ret = wc_AesGetKeySize(aes, &keySize); + if (ret != 0) + return ret; + +#ifdef WOLFSSL_STM32_CUBEMX + ret = wc_Stm32_Aes_Init(aes, &hcryp); + if (ret != 0) + return ret; #endif - WOLFSSL_ENTER("AesGcmDecrypt"); + ret = wolfSSL_CryptHwMutexLock(); + if (ret != 0) { + return ret; + } -#ifdef WOLFSSL_PIC32MZ_CRYPT - ctr = (char *)aes->iv_ce ; + XMEMSET(ctr, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(ctr, iv, ivSz); + ctr[AES_BLOCK_SIZE - 1] = 1; + } + else { + GHASH(aes, NULL, 0, iv, ivSz, ctr, AES_BLOCK_SIZE); + } + /* Hardware requires counter + 1 */ + IncrementGcmCounter(ctr); + + if (authInSz == 0 || (authInSz % AES_BLOCK_SIZE) != 0) { + /* Need to pad the AAD to a full block with zeros. */ + authPadSz = ((authInSz / AES_BLOCK_SIZE) + 1) * AES_BLOCK_SIZE; + authInPadded = (byte*)XMALLOC(authPadSz, aes->heap, + DYNAMIC_TYPE_TMP_BUFFER); + if (authInPadded == NULL) { + wolfSSL_CryptHwMutexUnLock(); + return MEMORY_E; + } + XMEMSET(authInPadded, 0, authPadSz); + XMEMCPY(authInPadded, authIn, authInSz); + } else { + authPadSz = authInSz; + authInPadded = (byte*)authIn; + } + +#ifdef WOLFSSL_STM32_CUBEMX + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)ctr; + hcryp.Init.Header = (STM_CRYPT_TYPE*)authInPadded; + hcryp.Init.HeaderSize = authInSz; + +#ifdef STM32_CRYPTO_AES_ONLY + /* Set the CRYP parameters */ + hcryp.Init.ChainingMode = CRYP_CHAINMODE_AES_GCM_GMAC; + hcryp.Init.OperatingMode = CRYP_ALGOMODE_DECRYPT; + hcryp.Init.GCMCMACPhase = CRYP_INIT_PHASE; + HAL_CRYP_Init(&hcryp); + + /* GCM init phase */ + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* GCM header phase */ + hcryp.Init.GCMCMACPhase = CRYP_HEADER_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, 0, NULL, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK) { + /* GCM payload phase - blocks */ + hcryp.Init.GCMCMACPhase = CRYP_PAYLOAD_PHASE; + if (blocks) { + status = HAL_CRYPEx_AES_Auth(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AES_Auth(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* GCM final phase */ + hcryp.Init.GCMCMACPhase = CRYP_FINAL_PHASE; + status = HAL_CRYPEx_AES_Auth(&hcryp, NULL, sz, tag, STM32_HAL_TIMEOUT); + } +#elif defined(STM32_HAL_V2) + hcryp.Init.Algorithm = CRYP_AES_GCM; + ByteReverseWords((word32*)partialBlock, (word32*)ctr, AES_BLOCK_SIZE); + hcryp.Init.pInitVect = (STM_CRYPT_TYPE*)partialBlock; + HAL_CRYP_Init(&hcryp); + + /* GCM payload phase - can handle partial blocks */ + status = HAL_CRYP_Decrypt(&hcryp, (uint32_t*)in, + (blocks * AES_BLOCK_SIZE) + partial, (uint32_t*)out, STM32_HAL_TIMEOUT); + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_GenerateAuthTAG(&hcryp, (uint32_t*)tag, + STM32_HAL_TIMEOUT); + } #else - ctr = counter ; + HAL_CRYP_Init(&hcryp); + if (blocks) { + /* GCM payload phase - blocks */ + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, (byte*)in, + (blocks * AES_BLOCK_SIZE), out, STM32_HAL_TIMEOUT); + } + if (status == HAL_OK && (partial != 0 || blocks == 0)) { + /* GCM payload phase - partial remainder */ + XMEMSET(partialBlock, 0, sizeof(partialBlock)); + XMEMCPY(partialBlock, in + (blocks * AES_BLOCK_SIZE), partial); + status = HAL_CRYPEx_AESGCM_Decrypt(&hcryp, partialBlock, partial, + partialBlock, STM32_HAL_TIMEOUT); + XMEMCPY(out + (blocks * AES_BLOCK_SIZE), partialBlock, partial); + } + if (status == HAL_OK) { + /* Compute the authTag */ + status = HAL_CRYPEx_AESGCM_Finish(&hcryp, sz, tag, STM32_HAL_TIMEOUT); + } #endif - XMEMSET(ctr, 0, AES_BLOCK_SIZE); - XMEMCPY(ctr, iv, ivSz); - InitGcmCounter(ctr); + if (status != HAL_OK) + ret = AES_GCM_AUTH_E; + + HAL_CRYP_DeInit(&hcryp); + +#else /* STD_PERI_LIB */ + ByteReverseWords(keyCopy, (word32*)aes->key, aes->keylen); + + /* Input size and auth size need to be the actual sizes, even though + * they are not block aligned, because this length (in bits) is used + * in the final GHASH. */ + status = CRYP_AES_GCM(MODE_DECRYPT, (uint8_t*)ctr, + (uint8_t*)keyCopy, keySize * 8, + (uint8_t*)in, sz, + (uint8_t*)authInPadded, authInSz, + (uint8_t*)out, tag); + if (status != SUCCESS) + ret = AES_GCM_AUTH_E; +#endif /* WOLFSSL_STM32_CUBEMX */ + + /* STM32 GCM hardware only supports IV of 12 bytes, so use software for auth */ + if (sz == 0 || ivSz != GCM_NONCE_MID_SZ) { + DecrementGcmCounter(ctr); /* hardware requires +1, so subtract it */ + GHASH(aes, authIn, authInSz, in, sz, tag, sizeof(tag)); + wc_AesEncrypt(aes, ctr, partialBlock); + xorbuf(tag, partialBlock, sizeof(tag)); + } - /* Calculate the authTag again using the received auth data and the - * cipher text. */ - { - byte Tprime[AES_BLOCK_SIZE]; - byte EKY0[AES_BLOCK_SIZE]; + if (ConstantCompare(authTag, tag, authTagSz) != 0) { + ret = AES_GCM_AUTH_E; + } - GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, EKY0); - #else - wc_AesEncrypt(aes, ctr, EKY0); - #endif - xorbuf(Tprime, EKY0, sizeof(Tprime)); + /* Free memory if not a multiple of AES_BLOCK_SZ */ + if (authInPadded != authIn) { + XFREE(authInPadded, aes->heap, DYNAMIC_TYPE_TMP_BUFFER); + } - if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { - return AES_GCM_AUTH_E; - } + wolfSSL_CryptHwMutexUnLock(); + + return ret; +} + +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI +int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz); +#else +static +#endif +int AES_GCM_decrypt_C(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + word32 blocks = sz / AES_BLOCK_SIZE; + word32 partial = sz % AES_BLOCK_SIZE; + const byte* c = in; + byte* p = out; + byte counter[AES_BLOCK_SIZE]; + byte initialCounter[AES_BLOCK_SIZE]; + byte *ctr; + byte scratch[AES_BLOCK_SIZE]; + byte Tprime[AES_BLOCK_SIZE]; + byte EKY0[AES_BLOCK_SIZE]; +#ifdef OPENSSL_EXTRA + word32 aadTemp; +#endif + ctr = counter; + XMEMSET(initialCounter, 0, AES_BLOCK_SIZE); + if (ivSz == GCM_NONCE_MID_SZ) { + XMEMCPY(initialCounter, iv, ivSz); + initialCounter[AES_BLOCK_SIZE - 1] = 1; + } + else { +#ifdef OPENSSL_EXTRA + aadTemp = aes->aadLen; + aes->aadLen = 0; +#endif + GHASH(aes, NULL, 0, iv, ivSz, initialCounter, AES_BLOCK_SIZE); +#ifdef OPENSSL_EXTRA + aes->aadLen = aadTemp; +#endif + } + XMEMCPY(ctr, initialCounter, AES_BLOCK_SIZE); + + /* Calc the authTag again using the received auth data and the cipher text */ + GHASH(aes, authIn, authInSz, in, sz, Tprime, sizeof(Tprime)); + wc_AesEncrypt(aes, ctr, EKY0); + xorbuf(Tprime, EKY0, sizeof(Tprime)); + +#ifdef OPENSSL_EXTRA + if (!out) { + /* authenticated, non-confidential data */ + /* store AAD size for next call */ + aes->aadLen = authInSz; + } +#endif + if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { + return AES_GCM_AUTH_E; } -#ifdef WOLFSSL_PIC32MZ_CRYPT - if(blocks) - wc_AesCrypt(aes, out, in, blocks*AES_BLOCK_SIZE, - PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM ); +#if defined(WOLFSSL_PIC32MZ_CRYPT) + if (blocks) { + /* use initial IV for HW, but don't use it below */ + XMEMCPY(aes->reg, ctr, AES_BLOCK_SIZE); + + ret = wc_Pic32AesCrypt( + aes->key, aes->keylen, aes->reg, AES_BLOCK_SIZE, + out, in, (blocks * AES_BLOCK_SIZE), + PIC32_DECRYPTION, PIC32_ALGO_AES, PIC32_CRYPTOALGO_AES_GCM); + if (ret != 0) + return ret; + } + /* process remainder using partial handling */ #endif +#if defined(HAVE_AES_ECB) && !defined(WOLFSSL_PIC32MZ_CRYPT) + /* some hardware acceleration can gain performance from doing AES encryption + * of the whole buffer at once */ + if (c != p && blocks > 0) { /* can not handle inline decryption */ + while (blocks--) { + IncrementGcmCounter(ctr); + XMEMCPY(p, ctr, AES_BLOCK_SIZE); + p += AES_BLOCK_SIZE; + } + + /* reset number of blocks and then do encryption */ + blocks = sz / AES_BLOCK_SIZE; + + wc_AesEcbEncrypt(aes, out, out, AES_BLOCK_SIZE * blocks); + xorbuf(out, c, AES_BLOCK_SIZE * blocks); + c += AES_BLOCK_SIZE * blocks; + } + else +#endif /* HAVE_AES_ECB && !PIC32MZ */ while (blocks--) { IncrementGcmCounter(ctr); - #ifndef WOLFSSL_PIC32MZ_CRYPT - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, scratch); - #else - wc_AesEncrypt(aes, ctr, scratch); - #endif + #if !defined(WOLFSSL_PIC32MZ_CRYPT) + wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, c, AES_BLOCK_SIZE); XMEMCPY(p, scratch, AES_BLOCK_SIZE); - #endif + #endif p += AES_BLOCK_SIZE; c += AES_BLOCK_SIZE; } + if (partial != 0) { IncrementGcmCounter(ctr); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(ctr, key, aes->rounds, scratch); - #else - wc_AesEncrypt(aes, ctr, scratch); - #endif + wc_AesEncrypt(aes, ctr, scratch); xorbuf(scratch, c, partial); XMEMCPY(p, scratch, partial); } - return 0; + + return ret; +} + +/* Software AES - GCM Decrypt */ +int wc_AesGcmDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + const byte* iv, word32 ivSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ +#ifdef WOLFSSL_AESNI + int res = AES_GCM_AUTH_E; +#endif + + /* argument checks */ + /* If the sz is non-zero, both in and out must be set. If sz is 0, + * in and out are don't cares, as this is is the GMAC case. */ + if (aes == NULL || iv == NULL || (sz != 0 && (in == NULL || out == NULL)) || + authTag == NULL || authTagSz > AES_BLOCK_SIZE || authTagSz == 0 || + ivSz == 0) { + + return BAD_FUNC_ARG; + } + +#ifdef WOLF_CRYPTO_CB + if (aes->devId != INVALID_DEVID) { + int ret = wc_CryptoCb_AesGcmDecrypt(aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + if (ret != CRYPTOCB_UNAVAILABLE) + return ret; + /* fall-through when unavailable */ + } +#endif + +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + /* if async and byte count above threshold */ + /* only 12-byte IV is supported in HW */ + if (aes->asyncDev.marker == WOLFSSL_ASYNC_MARKER_AES && + sz >= WC_ASYNC_THRESH_AES_GCM && ivSz == GCM_NONCE_MID_SZ) { + #if defined(HAVE_CAVIUM) + #ifdef HAVE_CAVIUM_V + if (authInSz == 20) { /* Nitrox V GCM is only working with 20 byte AAD */ + return NitroxAesGcmDecrypt(aes, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } + #endif + #elif defined(HAVE_INTEL_QA) + return IntelQaSymAesGcmDecrypt(&aes->asyncDev, out, in, sz, + (const byte*)aes->devKey, aes->keylen, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + #else /* WOLFSSL_ASYNC_CRYPT_TEST */ + if (wc_AsyncTestInit(&aes->asyncDev, ASYNC_TEST_AES_GCM_DECRYPT)) { + WC_ASYNC_TEST* testDev = &aes->asyncDev.test; + testDev->aes.aes = aes; + testDev->aes.out = out; + testDev->aes.in = in; + testDev->aes.sz = sz; + testDev->aes.iv = iv; + testDev->aes.ivSz = ivSz; + testDev->aes.authTag = (byte*)authTag; + testDev->aes.authTagSz = authTagSz; + testDev->aes.authIn = authIn; + testDev->aes.authInSz = authInSz; + return WC_PENDING_E; + } + #endif + } +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef STM32_CRYPTO_AES_GCM + /* The STM standard peripheral library API's doesn't support partial blocks */ + #ifdef STD_PERI_LIB + if (partial == 0) + #endif + { + return wc_AesGcmDecrypt_STM32( + aes, out, in, sz, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + } +#endif /* STM32_CRYPTO_AES_GCM */ + +#ifdef WOLFSSL_AESNI + #ifdef HAVE_INTEL_AVX2 + if (IS_INTEL_AVX2(intel_flags)) { + AES_GCM_decrypt_avx2(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else + #endif + #ifdef HAVE_INTEL_AVX1 + if (IS_INTEL_AVX1(intel_flags)) { + AES_GCM_decrypt_avx1(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else + #endif + if (haveAESNI) { + AES_GCM_decrypt(in, out, authIn, iv, authTag, sz, authInSz, ivSz, + authTagSz, (byte*)aes->key, aes->rounds, &res); + if (res == 0) + return AES_GCM_AUTH_E; + return 0; + } + else +#endif + { + return AES_GCM_decrypt_C(aes, out, in, sz, iv, ivSz, authTag, authTagSz, + authIn, authInSz); + } +} +#endif +#endif /* HAVE_AES_DECRYPT || HAVE_AESGCM_DECRYPT */ +#endif /* WOLFSSL_XILINX_CRYPT */ +#endif /* end of block for AESGCM implementation selection */ + + +/* Common to all, abstract functions that build off of lower level AESGCM + * functions */ +#ifndef WC_NO_RNG + +int wc_AesGcmSetExtIV(Aes* aes, const byte* iv, word32 ivSz) +{ + int ret = 0; + + if (aes == NULL || iv == NULL || + (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && + ivSz != GCM_NONCE_MAX_SZ)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + XMEMCPY((byte*)aes->reg, iv, ivSz); + + /* If the IV is 96, allow for a 2^64 invocation counter. + * For any other size for the nonce, limit the invocation + * counter to 32-bits. (SP 800-38D 8.3) */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + aes->nonceSz = ivSz; + } + + return ret; +} + + +int wc_AesGcmSetIV(Aes* aes, word32 ivSz, + const byte* ivFixed, word32 ivFixedSz, + WC_RNG* rng) +{ + int ret = 0; + + if (aes == NULL || rng == NULL || + (ivSz != GCM_NONCE_MIN_SZ && ivSz != GCM_NONCE_MID_SZ && + ivSz != GCM_NONCE_MAX_SZ) || + (ivFixed == NULL && ivFixedSz != 0) || + (ivFixed != NULL && ivFixedSz != AES_IV_FIXED_SZ)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + byte* iv = (byte*)aes->reg; + + if (ivFixedSz) + XMEMCPY(iv, ivFixed, ivFixedSz); + + ret = wc_RNG_GenerateBlock(rng, iv + ivFixedSz, ivSz - ivFixedSz); + } + + if (ret == 0) { + /* If the IV is 96, allow for a 2^64 invocation counter. + * For any other size for the nonce, limit the invocation + * counter to 32-bits. (SP 800-38D 8.3) */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = (ivSz == GCM_NONCE_MID_SZ) ? 0 : 0xFFFFFFFF; + aes->nonceSz = ivSz; + } + + return ret; +} + + +int wc_AesGcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, + byte* ivOut, word32 ivOutSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + int ret = 0; + + if (aes == NULL || (sz != 0 && (in == NULL || out == NULL)) || + ivOut == NULL || ivOutSz != aes->nonceSz || + (authIn == NULL && authInSz != 0)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + aes->invokeCtr[0]++; + if (aes->invokeCtr[0] == 0) { + aes->invokeCtr[1]++; + if (aes->invokeCtr[1] == 0) + ret = AES_GCM_OVERFLOW_E; + } + } + + if (ret == 0) { + XMEMCPY(ivOut, aes->reg, ivOutSz); + ret = wc_AesGcmEncrypt(aes, out, in, sz, + (byte*)aes->reg, ivOutSz, + authTag, authTagSz, + authIn, authInSz); + if (ret == 0) + IncCtr((byte*)aes->reg, ivOutSz); + } + + return ret; +} + +int wc_Gmac(const byte* key, word32 keySz, byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + byte* authTag, word32 authTagSz, WC_RNG* rng) +{ + Aes aes; + int ret; + + if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || + authTag == NULL || authTagSz == 0 || rng == NULL) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmSetIV(&aes, ivSz, NULL, 0, rng); + if (ret == 0) + ret = wc_AesGcmEncrypt_ex(&aes, NULL, NULL, 0, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } + ForceZero(&aes, sizeof(aes)); + + return ret; } +int wc_GmacVerify(const byte* key, word32 keySz, + const byte* iv, word32 ivSz, + const byte* authIn, word32 authInSz, + const byte* authTag, word32 authTagSz) +{ + int ret; +#ifndef NO_AES_DECRYPT + Aes aes; + + if (key == NULL || iv == NULL || (authIn == NULL && authInSz != 0) || + authTag == NULL || authTagSz == 0 || authTagSz > AES_BLOCK_SIZE) { + + return BAD_FUNC_ARG; + } + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret == 0) { + ret = wc_AesGcmSetKey(&aes, key, keySz); + if (ret == 0) + ret = wc_AesGcmDecrypt(&aes, NULL, NULL, 0, iv, ivSz, + authTag, authTagSz, authIn, authInSz); + wc_AesFree(&aes); + } + ForceZero(&aes, sizeof(aes)); +#else + (void)key; + (void)keySz; + (void)iv; + (void)ivSz; + (void)authIn; + (void)authInSz; + (void)authTag; + (void)authTagSz; + ret = NOT_COMPILED_IN; +#endif + return ret; +} + +#endif /* WC_NO_RNG */ WOLFSSL_API int wc_GmacSetKey(Gmac* gmac, const byte* key, word32 len) { + if (gmac == NULL || key == NULL) { + return BAD_FUNC_ARG; + } return wc_AesGcmSetKey(&gmac->aes, key, len); } @@ -3472,69 +6962,114 @@ WOLFSSL_API int wc_GmacUpdate(Gmac* gmac, const byte* iv, word32 ivSz, #ifdef HAVE_AESCCM -#ifdef STM32F2_CRYPTO - #error "STM32F2 crypto doesn't currently support AES-CCM mode" +int wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) +{ + if (!((keySz == 16) || (keySz == 24) || (keySz == 32))) + return BAD_FUNC_ARG; + + return wc_AesSetKey(aes, key, keySz, NULL, AES_ENCRYPTION); +} + +#ifdef WOLFSSL_ARMASM + /* implementation located in wolfcrypt/src/port/arm/armv8-aes.c */ #elif defined(HAVE_COLDFIRE_SEC) #error "Coldfire SEC doesn't currently support AES-CCM mode" -#elif defined(WOLFSSL_PIC32MZ_CRYPT) - #error "PIC32MZ doesn't currently support AES-CCM mode" +#elif defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam_aes.c */ -#endif +#elif defined(FREESCALE_LTC) -void wc_AesCcmSetKey(Aes* aes, const byte* key, word32 keySz) +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { - byte nonce[AES_BLOCK_SIZE]; + byte *key; + uint32_t keySize; + status_t status; - if (!((keySz == 16) || (keySz == 24) || (keySz == 32))) - return; + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + key = (byte*)aes->key; + + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } - XMEMSET(nonce, 0, sizeof(nonce)); - wc_AesSetKey(aes, key, keySz, nonce, AES_ENCRYPTION); + status = LTC_AES_EncryptTagCcm(LTC_BASE, in, out, inSz, + nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); + + return (kStatus_Success == status) ? 0 : BAD_FUNC_ARG; } +#ifdef HAVE_AES_DECRYPT +int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + const byte* nonce, word32 nonceSz, + const byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) +{ + byte *key; + uint32_t keySize; + status_t status; + + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13) + return BAD_FUNC_ARG; + + key = (byte*)aes->key; + status = wc_AesGetKeySize(aes, &keySize); + if (status != 0) { + return status; + } + + status = LTC_AES_DecryptTagCcm(LTC_BASE, in, out, inSz, + nonce, nonceSz, authIn, authInSz, key, keySize, authTag, authTagSz); + + if (status == kStatus_Success) { + return 0; + } + else { + XMEMSET(out, 0, inSz); + return AES_CCM_AUTH_E; + } +} +#endif /* HAVE_AES_DECRYPT */ + +#else + +/* Software CCM */ static void roll_x(Aes* aes, const byte* in, word32 inSz, byte* out) { - #ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; - #endif - /* process the bulk of the data */ while (inSz >= AES_BLOCK_SIZE) { xorbuf(out, in, AES_BLOCK_SIZE); in += AES_BLOCK_SIZE; inSz -= AES_BLOCK_SIZE; - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(out, key, aes->rounds, out); - #else - wc_AesEncrypt(aes, out, out); - #endif + wc_AesEncrypt(aes, out, out); } /* process remainder of the data */ if (inSz > 0) { xorbuf(out, in, inSz); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(out, key, aes->rounds, out); - #else - wc_AesEncrypt(aes, out, out); - #endif + wc_AesEncrypt(aes, out, out); } } - static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) { word32 authLenSz; word32 remainder; - #ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; - #endif - /* encode the length in */ if (inSz <= 0xFEFF) { authLenSz = 2; @@ -3568,18 +7103,14 @@ static void roll_auth(Aes* aes, const byte* in, word32 inSz, byte* out) xorbuf(out + authLenSz, in, inSz); inSz = 0; } - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(out, key, aes->rounds, out); - #else - wc_AesEncrypt(aes, out, out); - #endif + wc_AesEncrypt(aes, out, out); if (inSz > 0) roll_x(aes, in, inSz, out); } -static INLINE void AesCcmCtrInc(byte* B, word32 lenSz) +static WC_INLINE void AesCcmCtrInc(byte* B, word32 lenSz) { word32 i; @@ -3588,34 +7119,85 @@ static INLINE void AesCcmCtrInc(byte* B, word32 lenSz) } } +#ifdef WOLFSSL_AESNI +static WC_INLINE void AesCcmCtrIncSet4(byte* B, word32 lenSz) +{ + word32 i; + + /* B+1 = B */ + XMEMCPY(B + AES_BLOCK_SIZE * 1, B, AES_BLOCK_SIZE); + /* B+2,B+3 = B,B+1 */ + XMEMCPY(B + AES_BLOCK_SIZE * 2, B, AES_BLOCK_SIZE * 2); + + for (i = 0; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 1 - 1 - i] != 0) break; + } + B[AES_BLOCK_SIZE * 2 - 1] += 2; + if (B[AES_BLOCK_SIZE * 2 - 1] < 2) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 2 - 1 - i] != 0) break; + } + } + B[AES_BLOCK_SIZE * 3 - 1] += 3; + if (B[AES_BLOCK_SIZE * 3 - 1] < 3) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE * 3 - 1 - i] != 0) break; + } + } +} + +static WC_INLINE void AesCcmCtrInc4(byte* B, word32 lenSz) +{ + word32 i; -void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, + B[AES_BLOCK_SIZE - 1] += 4; + if (B[AES_BLOCK_SIZE - 1] < 4) { + for (i = 1; i < lenSz; i++) { + if (++B[AES_BLOCK_SIZE - 1 - i] != 0) break; + } + } +} +#endif + +/* Software AES - CCM Encrypt */ +/* return 0 on success */ +int wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, const byte* nonce, word32 nonceSz, byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { +#ifndef WOLFSSL_AESNI byte A[AES_BLOCK_SIZE]; byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; +#endif byte lenSz; word32 i; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); - #ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; - #endif + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; + XMEMSET(A, 0, sizeof(A)); XMEMCPY(B+1, nonce, nonceSz); lenSz = AES_BLOCK_SIZE - 1 - (byte)nonceSz; B[0] = (authInSz > 0 ? 64 : 0) + (8 * (((byte)authTagSz - 2) / 2)) + (lenSz - 1); - for (i = 0; i < lenSz; i++) - B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF; + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif if (authInSz > 0) roll_auth(aes, authIn, authInSz, A); if (inSz > 0) @@ -3625,20 +7207,32 @@ void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, B[0] = lenSz - 1; for (i = 0; i < lenSz; i++) B[AES_BLOCK_SIZE - 1 - i] = 0; - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); xorbuf(authTag, A, authTagSz); B[15] = 1; +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (inSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(out, A, AES_BLOCK_SIZE * 4); + + inSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + out += AES_BLOCK_SIZE * 4; + + if (inSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif while (inSz >= AES_BLOCK_SIZE) { - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); xorbuf(A, in, AES_BLOCK_SIZE); XMEMCPY(out, A, AES_BLOCK_SIZE); @@ -3648,35 +7242,43 @@ void wc_AesCcmEncrypt(Aes* aes, byte* out, const byte* in, word32 inSz, out += AES_BLOCK_SIZE; } if (inSz > 0) { - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); xorbuf(A, in, inSz); XMEMCPY(out, A, inSz); } ForceZero(A, AES_BLOCK_SIZE); ForceZero(B, AES_BLOCK_SIZE); -} + return 0; +} +#ifdef HAVE_AES_DECRYPT +/* Software AES - CCM Decrypt */ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, const byte* nonce, word32 nonceSz, const byte* authTag, word32 authTagSz, const byte* authIn, word32 authInSz) { +#ifndef WOLFSSL_AESNI byte A[AES_BLOCK_SIZE]; byte B[AES_BLOCK_SIZE]; +#else + ALIGN128 byte B[AES_BLOCK_SIZE * 4]; + ALIGN128 byte A[AES_BLOCK_SIZE * 4]; +#endif byte* o; byte lenSz; word32 i, oSz; int result = 0; + byte mask = 0xFF; + const word32 wordSz = (word32)sizeof(word32); - #ifdef FREESCALE_MMCAU - byte* key = (byte*)aes->key; - #endif + /* sanity check on arguments */ + if (aes == NULL || out == NULL || in == NULL || nonce == NULL + || authTag == NULL || nonceSz < 7 || nonceSz > 13 || + authTagSz > AES_BLOCK_SIZE) + return BAD_FUNC_ARG; o = out; oSz = inSz; @@ -3688,12 +7290,28 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, B[AES_BLOCK_SIZE - 1 - i] = 0; B[15] = 1; +#ifdef WOLFSSL_AESNI + if (haveAESNI && aes->use_aesni) { + while (oSz >= AES_BLOCK_SIZE * 4) { + AesCcmCtrIncSet4(B, lenSz); + + AES_ECB_encrypt(B, A, AES_BLOCK_SIZE * 4, (byte*)aes->key, + aes->rounds); + xorbuf(A, in, AES_BLOCK_SIZE * 4); + XMEMCPY(o, A, AES_BLOCK_SIZE * 4); + + oSz -= AES_BLOCK_SIZE * 4; + in += AES_BLOCK_SIZE * 4; + o += AES_BLOCK_SIZE * 4; + + if (oSz < AES_BLOCK_SIZE * 4) { + AesCcmCtrInc4(B, lenSz); + } + } + } +#endif while (oSz >= AES_BLOCK_SIZE) { - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); xorbuf(A, in, AES_BLOCK_SIZE); XMEMCPY(o, A, AES_BLOCK_SIZE); @@ -3703,22 +7321,14 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, o += AES_BLOCK_SIZE; } if (inSz > 0) { - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); xorbuf(A, in, oSz); XMEMCPY(o, A, oSz); } for (i = 0; i < lenSz; i++) B[AES_BLOCK_SIZE - 1 - i] = 0; - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif + wc_AesEncrypt(aes, B, A); o = out; oSz = inSz; @@ -3726,14 +7336,14 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, B[0] = (authInSz > 0 ? 64 : 0) + (8 * (((byte)authTagSz - 2) / 2)) + (lenSz - 1); - for (i = 0; i < lenSz; i++) - B[AES_BLOCK_SIZE - 1 - i] = (inSz >> (8 * i)) & 0xFF; + for (i = 0; i < lenSz; i++) { + if (mask && i >= wordSz) + mask = 0x00; + B[AES_BLOCK_SIZE - 1 - i] = (inSz >> ((8 * i) & mask)) & mask; + } + + wc_AesEncrypt(aes, B, A); - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, A); - #else - wc_AesEncrypt(aes, B, A); - #endif if (authInSz > 0) roll_auth(aes, authIn, authInSz, A); if (inSz > 0) @@ -3742,11 +7352,7 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, B[0] = lenSz - 1; for (i = 0; i < lenSz; i++) B[AES_BLOCK_SIZE - 1 - i] = 0; - #ifdef FREESCALE_MMCAU - cau_aes_encrypt(B, key, aes->rounds, B); - #else - wc_AesEncrypt(aes, B, B); - #endif + wc_AesEncrypt(aes, B, B); xorbuf(A, B, authTagSz); if (ConstantCompare(A, authTag, authTagSz) != 0) { @@ -3764,134 +7370,1335 @@ int wc_AesCcmDecrypt(Aes* aes, byte* out, const byte* in, word32 inSz, return result; } -#endif /* HAVE_AESCCM */ +#endif /* HAVE_AES_DECRYPT */ +#endif /* software CCM */ +/* abstract functions that call lower level AESCCM functions */ +#ifndef WC_NO_RNG -#ifdef HAVE_CAVIUM +int wc_AesCcmSetNonce(Aes* aes, const byte* nonce, word32 nonceSz) +{ + int ret = 0; + + if (aes == NULL || nonce == NULL || + nonceSz < CCM_NONCE_MIN_SZ || nonceSz > CCM_NONCE_MAX_SZ) { -#include <wolfssl/ctaocrypt/logging.h> -#include "cavium_common.h" + ret = BAD_FUNC_ARG; + } -/* Initiliaze Aes for use with Nitrox device */ -int wc_AesInitCavium(Aes* aes, int devId) + if (ret == 0) { + XMEMCPY(aes->reg, nonce, nonceSz); + aes->nonceSz = nonceSz; + + /* Invocation counter should be 2^61 */ + aes->invokeCtr[0] = 0; + aes->invokeCtr[1] = 0xE0000000; + } + + return ret; +} + + +int wc_AesCcmEncrypt_ex(Aes* aes, byte* out, const byte* in, word32 sz, + byte* ivOut, word32 ivOutSz, + byte* authTag, word32 authTagSz, + const byte* authIn, word32 authInSz) { + int ret = 0; + + if (aes == NULL || out == NULL || + (in == NULL && sz != 0) || + ivOut == NULL || + (authIn == NULL && authInSz != 0) || + (ivOutSz != aes->nonceSz)) { + + ret = BAD_FUNC_ARG; + } + + if (ret == 0) { + aes->invokeCtr[0]++; + if (aes->invokeCtr[0] == 0) { + aes->invokeCtr[1]++; + if (aes->invokeCtr[1] == 0) + ret = AES_CCM_OVERFLOW_E; + } + } + + if (ret == 0) { + ret = wc_AesCcmEncrypt(aes, out, in, sz, + (byte*)aes->reg, aes->nonceSz, + authTag, authTagSz, + authIn, authInSz); + if (ret == 0) { + XMEMCPY(ivOut, aes->reg, aes->nonceSz); + IncCtr((byte*)aes->reg, aes->nonceSz); + } + } + + return ret; +} + +#endif /* WC_NO_RNG */ + +#endif /* HAVE_AESCCM */ + + +/* Initialize Aes for use with async hardware */ +int wc_AesInit(Aes* aes, void* heap, int devId) +{ + int ret = 0; + if (aes == NULL) - return -1; + return BAD_FUNC_ARG; - if (CspAllocContext(CONTEXT_SSL, &aes->contextHandle, devId) != 0) - return -1; + aes->heap = heap; +#ifdef WOLF_CRYPTO_CB aes->devId = devId; - aes->magic = WOLFSSL_AES_CAVIUM_MAGIC; - - return 0; + aes->devCtx = NULL; +#else + (void)devId; +#endif +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + ret = wolfAsync_DevCtxInit(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES, + aes->heap, devId); +#endif /* WOLFSSL_ASYNC_CRYPT */ + +#ifdef WOLFSSL_AFALG + aes->alFd = -1; + aes->rdFd = -1; +#endif +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + aes->ctx.cfd = -1; +#endif +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + XMEMSET(&aes->ctx, 0, sizeof(aes->ctx)); +#endif +#ifdef HAVE_AESGCM +#ifdef OPENSSL_EXTRA + XMEMSET(aes->aadH, 0, sizeof(aes->aadH)); + aes->aadLen = 0; +#endif +#endif + return ret; } +#ifdef HAVE_PKCS11 +int wc_AesInit_Id(Aes* aes, unsigned char* id, int len, void* heap, int devId) +{ + int ret = 0; -/* Free Aes from use with Nitrox device */ -void wc_AesFreeCavium(Aes* aes) + if (aes == NULL) + ret = BAD_FUNC_ARG; + if (ret == 0 && (len < 0 || len > AES_MAX_ID_LEN)) + ret = BUFFER_E; + + if (ret == 0) + ret = wc_AesInit(aes, heap, devId); + if (ret == 0) { + XMEMCPY(aes->id, id, len); + aes->idLen = len; + } + + return ret; +} +#endif + +/* Free Aes from use with async hardware */ +void wc_AesFree(Aes* aes) { if (aes == NULL) return; - if (aes->magic != WOLFSSL_AES_CAVIUM_MAGIC) - return; +#if defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES) + wolfAsync_DevCtxFree(&aes->asyncDev, WOLFSSL_ASYNC_MARKER_AES); +#endif /* WOLFSSL_ASYNC_CRYPT */ +#if defined(WOLFSSL_AFALG) || defined(WOLFSSL_AFALG_XILINX_AES) + if (aes->rdFd > 0) { /* negative is error case */ + close(aes->rdFd); + } + if (aes->alFd > 0) { + close(aes->alFd); + } +#endif /* WOLFSSL_AFALG */ +#if defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC)) + wc_DevCryptoFree(&aes->ctx); +#endif +#if defined(WOLF_CRYPTO_CB) || (defined(WOLFSSL_DEVCRYPTO) && \ + (defined(WOLFSSL_DEVCRYPTO_AES) || defined(WOLFSSL_DEVCRYPTO_CBC))) || \ + (defined(WOLFSSL_ASYNC_CRYPT) && defined(WC_ASYNC_ENABLE_AES)) + ForceZero((byte*)aes->devKey, AES_MAX_KEY_SIZE/WOLFSSL_BIT_SIZE); +#endif +} + + +int wc_AesGetKeySize(Aes* aes, word32* keySize) +{ + int ret = 0; + + if (aes == NULL || keySize == NULL) { + return BAD_FUNC_ARG; + } +#if defined(WOLFSSL_CRYPTOCELL) && defined(WOLFSSL_CRYPTOCELL_AES) + *keySize = aes->ctx.key.keySize; + return ret; +#endif + switch (aes->rounds) { +#ifdef WOLFSSL_AES_128 + case 10: + *keySize = 16; + break; +#endif +#ifdef WOLFSSL_AES_192 + case 12: + *keySize = 24; + break; +#endif +#ifdef WOLFSSL_AES_256 + case 14: + *keySize = 32; + break; +#endif + default: + *keySize = 0; + ret = BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* !WOLFSSL_TI_CRYPT */ + +#ifdef HAVE_AES_ECB +#if defined(WOLFSSL_IMX6_CAAM) && !defined(NO_IMX6_CAAM_AES) + /* implemented in wolfcrypt/src/port/caam/caam_aes.c */ - CspFreeContext(CONTEXT_SSL, aes->contextHandle, aes->devId); - aes->magic = 0; +#elif defined(WOLFSSL_AFALG) + /* implemented in wolfcrypt/src/port/af_alg/afalg_aes.c */ + +#elif defined(WOLFSSL_DEVCRYPTO_AES) + /* implemented in wolfcrypt/src/port/devcrypt/devcrypto_aes.c */ + +#elif defined(WOLFSSL_SCE) && !defined(WOLFSSL_SCE_NO_AES) + +/* Software AES - ECB */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + + return AES_ECB_encrypt(aes, in, out, sz); } -static int wc_AesCaviumSetKey(Aes* aes, const byte* key, word32 length, - const byte* iv) +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - if (aes == NULL) - return -1; + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; - XMEMCPY(aes->key, key, length); /* key still holds key, iv still in reg */ - if (length == 16) - aes->type = AES_128; - else if (length == 24) - aes->type = AES_192; - else if (length == 32) - aes->type = AES_256; + return AES_ECB_decrypt(aes, in, out, sz); +} - return wc_AesSetIV(aes, iv); +#else + +/* Software AES - ECB */ +int wc_AesEcbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + word32 blocks = sz / AES_BLOCK_SIZE; + + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + while (blocks>0) { + wc_AesEncryptDirect(aes, out, in); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + return 0; } -static int AesCaviumCbcEncrypt(Aes* aes, byte* out, const byte* in, - word32 length) +int wc_AesEcbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) { - wolfssl_word offset = 0; - word32 requestId; + word32 blocks = sz / AES_BLOCK_SIZE; - while (length > WOLFSSL_MAX_16BIT) { - word16 slen = (word16)WOLFSSL_MAX_16BIT; - if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE, - aes->type, slen, (byte*)in + offset, out + offset, - (byte*)aes->reg, (byte*)aes->key, &requestId, - aes->devId) != 0) { - WOLFSSL_MSG("Bad Cavium Aes Encrypt"); - return -1; + if ((in == NULL) || (out == NULL) || (aes == NULL)) + return BAD_FUNC_ARG; + while (blocks>0) { + wc_AesDecryptDirect(aes, out, in); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + return 0; +} +#endif +#endif /* HAVE_AES_ECB */ + +#if defined(WOLFSSL_AES_CFB) || defined(WOLFSSL_AES_OFB) +/* Feedback AES mode + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * mode flag to specify AES mode + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +static int wc_AesFeedbackEncrypt(Aes* aes, byte* out, const byte* in, + word32 sz, byte mode) +{ + byte* tmp = NULL; +#ifdef WOLFSSL_AES_CFB + byte* reg = NULL; +#endif + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + +#ifdef WOLFSSL_AES_CFB + if (aes->left && sz) { + reg = (byte*)aes->reg + AES_BLOCK_SIZE - aes->left; + } +#endif + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; } - length -= WOLFSSL_MAX_16BIT; - offset += WOLFSSL_MAX_16BIT; - XMEMCPY(aes->reg, out + offset - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + #endif + out++; + aes->left--; + sz--; } - if (length) { - word16 slen = (word16)length; - if (CspEncryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE, - aes->type, slen, (byte*)in + offset, out + offset, - (byte*)aes->reg, (byte*)aes->key, &requestId, - aes->devId) != 0) { - WOLFSSL_MSG("Bad Cavium Aes Encrypt"); - return -1; + + while (sz >= AES_BLOCK_SIZE) { + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); } - XMEMCPY(aes->reg, out + offset+length - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + XMEMCPY(out, aes->tmp, AES_BLOCK_SIZE); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + aes->left = 0; } + + /* encrypt left over data */ + if (sz) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + #ifdef WOLFSSL_AES_CFB + reg = (byte*)aes->reg; + #endif + + while (sz--) { + *(out) = *(in++) ^ *(tmp++); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + *(reg++) = *out; + } + #endif + out++; + aes->left--; + } + } + return 0; } -static int AesCaviumCbcDecrypt(Aes* aes, byte* out, const byte* in, - word32 length) + +#ifdef HAVE_AES_DECRYPT +/* CFB 128 + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Decrypt */ +static int wc_AesFeedbackDecrypt(Aes* aes, byte* out, const byte* in, word32 sz, + byte mode) { - word32 requestId; - wolfssl_word offset = 0; + byte* tmp; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + #ifdef WOLFSSL_AES_CFB + /* check if more input needs copied over to aes->reg */ + if (aes->left && sz && mode == AES_CFB_MODE) { + int size = min(aes->left, sz); + XMEMCPY((byte*)aes->reg + AES_BLOCK_SIZE - aes->left, in, size); + } + #endif + + /* consume any unused bytes left in aes->tmp */ + tmp = (byte*)aes->tmp + AES_BLOCK_SIZE - aes->left; + while (aes->left && sz) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; + sz--; + } - while (length > WOLFSSL_MAX_16BIT) { - word16 slen = (word16)WOLFSSL_MAX_16BIT; - XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE, - aes->type, slen, (byte*)in + offset, out + offset, - (byte*)aes->reg, (byte*)aes->key, &requestId, - aes->devId) != 0) { - WOLFSSL_MSG("Bad Cavium Aes Decrypt"); - return -1; + while (sz > AES_BLOCK_SIZE) { + /* Using aes->tmp here for inline case i.e. in=out */ + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY((byte*)aes->reg, (byte*)aes->tmp, AES_BLOCK_SIZE); + } + #endif + xorbuf((byte*)aes->tmp, in, AES_BLOCK_SIZE); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, AES_BLOCK_SIZE); } - length -= WOLFSSL_MAX_16BIT; - offset += WOLFSSL_MAX_16BIT; - XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + #endif + XMEMCPY(out, (byte*)aes->tmp, AES_BLOCK_SIZE); + out += AES_BLOCK_SIZE; + in += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + aes->left = 0; } - if (length) { - word16 slen = (word16)length; - XMEMCPY(aes->tmp, in + offset + slen - AES_BLOCK_SIZE, AES_BLOCK_SIZE); - if (CspDecryptAes(CAVIUM_BLOCKING, aes->contextHandle, CAVIUM_NO_UPDATE, - aes->type, slen, (byte*)in + offset, out + offset, - (byte*)aes->reg, (byte*)aes->key, &requestId, - aes->devId) != 0) { - WOLFSSL_MSG("Bad Cavium Aes Decrypt"); - return -1; + + /* decrypt left over data */ + if (sz) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + #ifdef WOLFSSL_AES_CFB + if (mode == AES_CFB_MODE) { + XMEMCPY(aes->reg, in, sz); + } + #endif + #ifdef WOLFSSL_AES_OFB + if (mode == AES_OFB_MODE) { + XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); + } + #endif + + aes->left = AES_BLOCK_SIZE; + tmp = (byte*)aes->tmp; + + while (sz--) { + *(out++) = *(in++) ^ *(tmp++); + aes->left--; } - XMEMCPY(aes->reg, aes->tmp, AES_BLOCK_SIZE); } + return 0; } +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_CFB */ -#endif /* HAVE_CAVIUM */ +#ifdef WOLFSSL_AES_CFB +/* CFB 128 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesCfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_CFB_MODE); +} -#endif /* WOLFSSL_TI_CRYPT */ -#endif /* HAVE_FIPS */ +#ifdef HAVE_AES_DECRYPT +/* CFB 128 + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Decrypt */ +int wc_AesCfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_CFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ + + +/* shift the whole AES_BLOCK_SIZE array left by 8 or 1 bits */ +static void shiftLeftArray(byte* ary, byte shift) +{ + int i; + + if (shift == WOLFSSL_BIT_SIZE) { + /* shifting over by 8 bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + ary[i] = ary[i+1]; + } + ary[i] = 0; + } + else { + byte carry = 0; + + /* shifting over by 7 or less bits */ + for (i = 0; i < AES_BLOCK_SIZE - 1; i++) { + carry = ary[i+1] & (0XFF << (WOLFSSL_BIT_SIZE - shift)); + carry >>= (WOLFSSL_BIT_SIZE - shift); + ary[i] = (ary[i] << shift) + carry; + } + ary[i] = ary[i] << shift; + } +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB8(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte *pt; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = in[0]; + } + + /* MSB + XOR */ + out[0] = aes->tmp[0] ^ in[0]; + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray(pt, WOLFSSL_BIT_SIZE); + pt[AES_BLOCK_SIZE - 1] = out[0]; + } + + out += 1; + in += 1; + sz -= 1; + } + + return 0; +} + + +/* returns 0 on success and negative values on failure */ +static int wc_AesFeedbackCFB1(Aes* aes, byte* out, const byte* in, + word32 sz, byte dir) +{ + byte tmp; + byte cur = 0; /* hold current work in order to handle inline in=out */ + byte* pt; + int bit = 7; + + if (aes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + if (sz == 0) { + return 0; + } + + while (sz > 0) { + wc_AesEncryptDirect(aes, (byte*)aes->tmp, (byte*)aes->reg); + if (dir == AES_DECRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + tmp = (0X01 << bit) & in[0]; + tmp = tmp >> bit; + tmp &= 0x01; + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } -#endif /* NO_AES */ + /* MSB + XOR */ + tmp = (0X01 << bit) & in[0]; + pt = (byte*)aes->tmp; + tmp = (pt[0] >> 7) ^ (tmp >> bit); + tmp &= 0x01; + cur |= (tmp << bit); + + + if (dir == AES_ENCRYPTION) { + pt = (byte*)aes->reg; + + /* LSB + CAT */ + shiftLeftArray((byte*)aes->reg, 1); + pt[AES_BLOCK_SIZE - 1] |= tmp; + } + + bit--; + if (bit < 0) { + out[0] = cur; + out += 1; + in += 1; + sz -= 1; + bit = 7; + cur = 0; + } + else { + sz -= 1; + } + } + + if (bit > 0 && bit < 7) { + out[0] = cur; + } + + return 0; +} + + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt (packed to left, i.e. 101 is 0x90) + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_ENCRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Encrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_ENCRYPTION); +} +#ifdef HAVE_AES_DECRYPT + +/* CFB 1 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer in bits (0x1 would be size of 1 and 0xFF size of 8) + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb1Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB1(aes, out, in, sz, AES_DECRYPTION); +} + + +/* CFB 8 + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative values on failure + */ +int wc_AesCfb8Decrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackCFB8(aes, out, in, sz, AES_DECRYPTION); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_CFB */ + +#ifdef WOLFSSL_AES_OFB +/* OFB + * + * aes structure holding key to use for encryption + * out buffer to hold result of encryption (must be at least as large as input + * buffer) + * in buffer to encrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - CFB Encrypt */ +int wc_AesOfbEncrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackEncrypt(aes, out, in, sz, AES_OFB_MODE); +} + + +#ifdef HAVE_AES_DECRYPT +/* OFB + * + * aes structure holding key to use for decryption + * out buffer to hold result of decryption (must be at least as large as input + * buffer) + * in buffer to decrypt + * sz size of input buffer + * + * returns 0 on success and negative error values on failure + */ +/* Software AES - OFB Decrypt */ +int wc_AesOfbDecrypt(Aes* aes, byte* out, const byte* in, word32 sz) +{ + return wc_AesFeedbackDecrypt(aes, out, in, sz, AES_OFB_MODE); +} +#endif /* HAVE_AES_DECRYPT */ +#endif /* WOLFSSL_AES_OFB */ + + +#ifdef HAVE_AES_KEYWRAP + +/* Initialize key wrap counter with value */ +static WC_INLINE void InitKeyWrapCounter(byte* inOutCtr, word32 value) +{ + int i; + word32 bytes; + + bytes = sizeof(word32); + for (i = 0; i < (int)sizeof(word32); i++) { + inOutCtr[i+sizeof(word32)] = (value >> ((bytes - 1) * 8)) & 0xFF; + bytes--; + } +} + +/* Increment key wrap counter */ +static WC_INLINE void IncrementKeyWrapCounter(byte* inOutCtr) +{ + int i; + + /* in network byte order so start at end and work back */ + for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { + if (++inOutCtr[i]) /* we're done unless we overflow */ + return; + } +} + +/* Decrement key wrap counter */ +static WC_INLINE void DecrementKeyWrapCounter(byte* inOutCtr) +{ + int i; + + for (i = KEYWRAP_BLOCK_SIZE - 1; i >= 0; i--) { + if (--inOutCtr[i] != 0xFF) /* we're done unless we underflow */ + return; + } +} + +/* perform AES key wrap (RFC3394), return out sz on success, negative on err */ +int wc_AesKeyWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, + byte* out, word32 outSz, const byte* iv) +{ + Aes aes; + byte* r; + word32 i; + int ret, j; + + byte t[KEYWRAP_BLOCK_SIZE]; + byte tmp[AES_BLOCK_SIZE]; + + /* n must be at least 2, output size is n + 8 bytes */ + if (key == NULL || in == NULL || inSz < 2 || + out == NULL || outSz < (inSz + KEYWRAP_BLOCK_SIZE)) + return BAD_FUNC_ARG; + + /* input must be multiple of 64-bits */ + if (inSz % KEYWRAP_BLOCK_SIZE != 0) + return BAD_FUNC_ARG; + + /* user IV is optional */ + if (iv == NULL) { + XMEMSET(tmp, 0xA6, KEYWRAP_BLOCK_SIZE); + } else { + XMEMCPY(tmp, iv, KEYWRAP_BLOCK_SIZE); + } + + r = out + 8; + XMEMCPY(r, in, inSz); + XMEMSET(t, 0, sizeof(t)); + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret != 0) + return ret; + + ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_ENCRYPTION); + if (ret != 0) + return ret; + + for (j = 0; j <= 5; j++) { + for (i = 1; i <= inSz / KEYWRAP_BLOCK_SIZE; i++) { + + /* load R[i] */ + XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); + + wc_AesEncryptDirect(&aes, tmp, tmp); + + /* calculate new A */ + IncrementKeyWrapCounter(t); + xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); + + /* save R[i] */ + XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); + r += KEYWRAP_BLOCK_SIZE; + } + r = out + KEYWRAP_BLOCK_SIZE; + } + + /* C[0] = A */ + XMEMCPY(out, tmp, KEYWRAP_BLOCK_SIZE); + + wc_AesFree(&aes); + + return inSz + KEYWRAP_BLOCK_SIZE; +} + +int wc_AesKeyUnWrap(const byte* key, word32 keySz, const byte* in, word32 inSz, + byte* out, word32 outSz, const byte* iv) +{ + Aes aes; + byte* r; + word32 i, n; + int ret, j; + + byte t[KEYWRAP_BLOCK_SIZE]; + byte tmp[AES_BLOCK_SIZE]; + + const byte* expIv; + const byte defaultIV[] = { + 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6, 0xA6 + }; + + (void)iv; + + if (key == NULL || in == NULL || inSz < 3 || + out == NULL || outSz < (inSz - KEYWRAP_BLOCK_SIZE)) + return BAD_FUNC_ARG; + + /* input must be multiple of 64-bits */ + if (inSz % KEYWRAP_BLOCK_SIZE != 0) + return BAD_FUNC_ARG; + + /* user IV optional */ + if (iv != NULL) { + expIv = iv; + } else { + expIv = defaultIV; + } + + /* A = C[0], R[i] = C[i] */ + XMEMCPY(tmp, in, KEYWRAP_BLOCK_SIZE); + XMEMCPY(out, in + KEYWRAP_BLOCK_SIZE, inSz - KEYWRAP_BLOCK_SIZE); + XMEMSET(t, 0, sizeof(t)); + + ret = wc_AesInit(&aes, NULL, INVALID_DEVID); + if (ret != 0) + return ret; + + ret = wc_AesSetKey(&aes, key, keySz, NULL, AES_DECRYPTION); + if (ret != 0) + return ret; + + /* initialize counter to 6n */ + n = (inSz - 1) / KEYWRAP_BLOCK_SIZE; + InitKeyWrapCounter(t, 6 * n); + + for (j = 5; j >= 0; j--) { + for (i = n; i >= 1; i--) { + + /* calculate A */ + xorbuf(tmp, t, KEYWRAP_BLOCK_SIZE); + DecrementKeyWrapCounter(t); + + /* load R[i], starting at end of R */ + r = out + ((i - 1) * KEYWRAP_BLOCK_SIZE); + XMEMCPY(tmp + KEYWRAP_BLOCK_SIZE, r, KEYWRAP_BLOCK_SIZE); + wc_AesDecryptDirect(&aes, tmp, tmp); + + /* save R[i] */ + XMEMCPY(r, tmp + KEYWRAP_BLOCK_SIZE, KEYWRAP_BLOCK_SIZE); + } + } + + wc_AesFree(&aes); + + /* verify IV */ + if (XMEMCMP(tmp, expIv, KEYWRAP_BLOCK_SIZE) != 0) + return BAD_KEYWRAP_IV_E; + + return inSz - KEYWRAP_BLOCK_SIZE; +} + +#endif /* HAVE_AES_KEYWRAP */ + +#ifdef WOLFSSL_AES_XTS + +/* Galios Field to use */ +#define GF_XTS 0x87 + +/* This is to help with setting keys to correct encrypt or decrypt type. + * + * tweak AES key for tweak in XTS + * aes AES key for encrypt/decrypt process + * key buffer holding aes key | tweak key + * len length of key buffer in bytes. Should be twice that of key size. i.e. + * 32 for a 16 byte key. + * dir direction, either AES_ENCRYPTION or AES_DECRYPTION + * heap heap hint to use for memory. Can be NULL + * devId id to use with async crypto. Can be 0 + * + * Note: is up to user to call wc_AesFree on tweak and aes key when done. + * + * return 0 on success + */ +int wc_AesXtsSetKey(XtsAes* aes, const byte* key, word32 len, int dir, + void* heap, int devId) +{ + word32 keySz; + int ret = 0; + + if (aes == NULL || key == NULL) { + return BAD_FUNC_ARG; + } + + if ((ret = wc_AesInit(&aes->tweak, heap, devId)) != 0) { + return ret; + } + if ((ret = wc_AesInit(&aes->aes, heap, devId)) != 0) { + return ret; + } + + keySz = len/2; + if (keySz != 16 && keySz != 32) { + WOLFSSL_MSG("Unsupported key size"); + return WC_KEY_SIZE_E; + } + + if ((ret = wc_AesSetKey(&aes->aes, key, keySz, NULL, dir)) == 0) { + ret = wc_AesSetKey(&aes->tweak, key + keySz, keySz, NULL, + AES_ENCRYPTION); + if (ret != 0) { + wc_AesFree(&aes->aes); + } + } + + return ret; +} + + +/* This is used to free up resources used by Aes structs + * + * aes AES keys to free + * + * return 0 on success + */ +int wc_AesXtsFree(XtsAes* aes) +{ + if (aes != NULL) { + wc_AesFree(&aes->aes); + wc_AesFree(&aes->tweak); + } + + return 0; +} + + +/* Same process as wc_AesXtsEncrypt but uses a word64 type as the tweak value + * instead of a byte array. This just converts the word64 to a byte array and + * calls wc_AesXtsEncrypt. + * + * aes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * sector value to use for tweak + * + * returns 0 on success + */ +int wc_AesXtsEncryptSector(XtsAes* aes, byte* out, const byte* in, + word32 sz, word64 sector) +{ + byte* pt; + byte i[AES_BLOCK_SIZE]; + + XMEMSET(i, 0, AES_BLOCK_SIZE); +#ifdef BIG_ENDIAN_ORDER + sector = ByteReverseWord64(sector); +#endif + pt = (byte*)§or; + XMEMCPY(i, pt, sizeof(word64)); + + return wc_AesXtsEncrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); +} + + +/* Same process as wc_AesXtsDecrypt but uses a word64 type as the tweak value + * instead of a byte array. This just converts the word64 to a byte array. + * + * aes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to encrypt + * sz size of both out and in buffers + * sector value to use for tweak + * + * returns 0 on success + */ +int wc_AesXtsDecryptSector(XtsAes* aes, byte* out, const byte* in, word32 sz, + word64 sector) +{ + byte* pt; + byte i[AES_BLOCK_SIZE]; + + XMEMSET(i, 0, AES_BLOCK_SIZE); +#ifdef BIG_ENDIAN_ORDER + sector = ByteReverseWord64(sector); +#endif + pt = (byte*)§or; + XMEMCPY(i, pt, sizeof(word64)); + + return wc_AesXtsDecrypt(aes, out, in, sz, (const byte*)i, AES_BLOCK_SIZE); +} + +#ifdef HAVE_AES_ECB +/* helper function for encrypting / decrypting full buffer at once */ +static int _AesXtsHelper(Aes* aes, byte* out, const byte* in, word32 sz, int dir) +{ + word32 outSz = sz; + word32 totalSz = (sz / AES_BLOCK_SIZE) * AES_BLOCK_SIZE; /* total bytes */ + byte* pt = out; + + outSz -= AES_BLOCK_SIZE; + + while (outSz > 0) { + word32 j; + byte carry = 0; + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE && outSz > 0; j++, outSz--) { + byte tmpC; + + tmpC = (pt[j] >> 7) & 0x01; + pt[j+AES_BLOCK_SIZE] = ((pt[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + pt[AES_BLOCK_SIZE] ^= GF_XTS; + } + + pt += AES_BLOCK_SIZE; + } + + xorbuf(out, in, totalSz); + if (dir == AES_ENCRYPTION) { + return wc_AesEcbEncrypt(aes, out, out, totalSz); + } + else { + return wc_AesEcbDecrypt(aes, out, out, totalSz); + } +} +#endif /* HAVE_AES_ECB */ + + +/* AES with XTS mode. (XTS) XEX encryption with Tweak and cipher text Stealing. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold cipher text + * in input plain text buffer to encrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +/* Software AES - XTS Encrypt */ +int wc_AesXtsEncrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + Aes *aes, *tweak; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + tweak = &xaes->tweak; + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks > 0) { + byte tmp[AES_BLOCK_SIZE]; + + XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES + * key setup passed to encrypt direct*/ + + wc_AesEncryptDirect(tweak, tmp, i); + + #ifdef HAVE_AES_ECB + /* encrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tmp, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_ENCRYPTION)) != 0) { + return ret; + } + } + #endif + + while (blocks > 0) { + word32 j; + byte carry = 0; + byte buf[AES_BLOCK_SIZE]; + + #ifdef HAVE_AES_ECB + if (in == out) { /* check for if inline */ + #endif + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, out, buf); + #ifdef HAVE_AES_ECB + } + #endif + xorbuf(out, tmp, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp[0] ^= GF_XTS; + } + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz > 0) { + byte buf[AES_BLOCK_SIZE]; + + XMEMCPY(buf, out - AES_BLOCK_SIZE, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + XMEMCPY(out, buf, sz); + XMEMCPY(buf, in, sz); + + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesEncryptDirect(aes, out - AES_BLOCK_SIZE, buf); + xorbuf(out - AES_BLOCK_SIZE, tmp, AES_BLOCK_SIZE); + } + } + else { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + return ret; +} + + +/* Same process as encryption but Aes key is AES_DECRYPTION type. + * + * xaes AES keys to use for block encrypt/decrypt + * out output buffer to hold plain text + * in input cipher text buffer to decrypt + * sz size of both out and in buffers + * i value to use for tweak + * iSz size of i buffer, should always be AES_BLOCK_SIZE but having this input + * adds a sanity check on how the user calls the function. + * + * returns 0 on success + */ +/* Software AES - XTS Decrypt */ +int wc_AesXtsDecrypt(XtsAes* xaes, byte* out, const byte* in, word32 sz, + const byte* i, word32 iSz) +{ + int ret = 0; + word32 blocks = (sz / AES_BLOCK_SIZE); + Aes *aes, *tweak; + + if (xaes == NULL || out == NULL || in == NULL) { + return BAD_FUNC_ARG; + } + + aes = &xaes->aes; + tweak = &xaes->tweak; + + if (iSz < AES_BLOCK_SIZE) { + return BAD_FUNC_ARG; + } + + if (blocks > 0) { + word32 j; + byte carry = 0; + byte tmp[AES_BLOCK_SIZE]; + byte stl = (sz % AES_BLOCK_SIZE); + + XMEMSET(tmp, 0, AES_BLOCK_SIZE); /* set to 0's in case of improper AES + * key setup passed to decrypt direct*/ + + wc_AesEncryptDirect(tweak, tmp, i); + + /* if Stealing then break out of loop one block early to handle special + * case */ + if (stl > 0) { + blocks--; + } + + #ifdef HAVE_AES_ECB + /* decrypt all of buffer at once when possible */ + if (in != out) { /* can not handle inline */ + XMEMCPY(out, tmp, AES_BLOCK_SIZE); + if ((ret = _AesXtsHelper(aes, out, in, sz, AES_DECRYPTION)) != 0) { + return ret; + } + } + #endif + + while (blocks > 0) { + byte buf[AES_BLOCK_SIZE]; + + #ifdef HAVE_AES_ECB + if (in == out) { /* check for if inline */ + #endif + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, out, buf); + #ifdef HAVE_AES_ECB + } + #endif + xorbuf(out, tmp, AES_BLOCK_SIZE); + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp[0] ^= GF_XTS; + } + carry = 0; + + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + blocks--; + } + + /* stealing operation of XTS to handle left overs */ + if (sz > 0) { + byte buf[AES_BLOCK_SIZE]; + byte tmp2[AES_BLOCK_SIZE]; + + /* multiply by shift left and propagate carry */ + for (j = 0; j < AES_BLOCK_SIZE; j++) { + byte tmpC; + + tmpC = (tmp[j] >> 7) & 0x01; + tmp2[j] = ((tmp[j] << 1) + carry) & 0xFF; + carry = tmpC; + } + if (carry) { + tmp2[0] ^= GF_XTS; + } + + XMEMCPY(buf, in, AES_BLOCK_SIZE); + xorbuf(buf, tmp2, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, out, buf); + xorbuf(out, tmp2, AES_BLOCK_SIZE); + + /* tmp2 holds partial | last */ + XMEMCPY(tmp2, out, AES_BLOCK_SIZE); + in += AES_BLOCK_SIZE; + out += AES_BLOCK_SIZE; + sz -= AES_BLOCK_SIZE; + + /* Make buffer with end of cipher text | last */ + XMEMCPY(buf, tmp2, AES_BLOCK_SIZE); + if (sz >= AES_BLOCK_SIZE) { /* extra sanity check before copy */ + return BUFFER_E; + } + XMEMCPY(buf, in, sz); + XMEMCPY(out, tmp2, sz); + + xorbuf(buf, tmp, AES_BLOCK_SIZE); + wc_AesDecryptDirect(aes, tmp2, buf); + xorbuf(tmp2, tmp, AES_BLOCK_SIZE); + XMEMCPY(out - AES_BLOCK_SIZE, tmp2, AES_BLOCK_SIZE); + } + } + else { + WOLFSSL_MSG("Plain text input too small for encryption"); + return BAD_FUNC_ARG; + } + + return ret; +} + +#endif /* WOLFSSL_AES_XTS */ + +#endif /* HAVE_FIPS */ +#endif /* !NO_AES */ |