diff options
author | Anna Weine <anna.weine@mozilla.com> | 2023-04-17 08:45:04 +0000 |
---|---|---|
committer | Anna Weine <anna.weine@mozilla.com> | 2023-04-17 08:45:04 +0000 |
commit | ee51da79edae57f2de4853e560a7358df61b274f (patch) | |
tree | 1766fee1034006c24df94acbda0df227112ff6ef | |
parent | 8c3a9f0bbc2a9c30abdf25aaf87d65f37284bda4 (diff) | |
download | nss-hg-ee51da79edae57f2de4853e560a7358df61b274f.tar.gz |
Bug 1727555 - Update HACL* till 51a72a953a4ee6f91e63b2816ae5c4e62edf35d6 r=nss-reviewers,jschanck
Differential Revision: https://phabricator.services.mozilla.com/D158327
62 files changed, 4701 insertions, 1394 deletions
diff --git a/automation/taskcluster/scripts/run_hacl.sh b/automation/taskcluster/scripts/run_hacl.sh index 7b82c911b..e414b9aa5 100755 --- a/automation/taskcluster/scripts/run_hacl.sh +++ b/automation/taskcluster/scripts/run_hacl.sh @@ -11,22 +11,34 @@ set -e -x -v # The docker image this is running in has NSS sources. # Get the HACL* source, containing a snapshot of the C code, extracted on the # HACL CI. -# When bug 1593647 is resolved, extract the code on CI again. -git clone -q "https://github.com/project-everest/hacl-star" ~/hacl-star -git -C ~/hacl-star checkout -q c95ab70fcb2bc21025d8845281bc4bc8987ca683 +git clone -q "https://github.com/hacl-star/hacl-star" ~/hacl-star +git -C ~/hacl-star checkout -q 51a72a953a4ee6f91e63b2816ae5c4e62edf35d6 # Format the C snapshot. cd ~/hacl-star/dist/mozilla cp ~/nss/.clang-format . find . -type f -name '*.[ch]' -exec clang-format -i {} \+ -cd ~/hacl-star/dist/kremlin +cd ~/hacl-star/dist/karamel cp ~/nss/.clang-format . find . -type f -name '*.[ch]' -exec clang-format -i {} \+ # These diff commands will return 1 if there are differences and stop the script. -files=($(find ~/nss/lib/freebl/verified/ -type f -name '*.[ch]')) + +# We have two checks in the script. +# The first one only checks the files in the verified/internal folder; the second one does for all the rest +# It was implemented like this due to not uniqueness of the names in the verified folders +# For instance, the files Hacl_Chacha20.h are present in both directories, but the content differs. + +files=($(find ~/nss/lib/freebl/verified/internal -type f -name '*.[ch]')) +for f in "${files[@]}"; do + file_name=$(basename "$f") + hacl_file=($(find ~/hacl-star/dist/mozilla/internal/ -type f -name $file_name)) + diff $hacl_file $f +done + +files=($(find ~/nss/lib/freebl/verified/ -type f -name '*.[ch]' -not -path "*/freebl/verified/internal/*")) for f in "${files[@]}"; do file_name=$(basename "$f") - hacl_file=($(find ~/hacl-star/dist/mozilla/ ~/hacl-star/dist/kremlin/ -type f -name $file_name)) + hacl_file=($(find ~/hacl-star/dist/mozilla/ ~/hacl-star/dist/karamel/ -type f -name $file_name -not -path "*/hacl-star/dist/mozilla/internal/*")) diff $hacl_file $f done diff --git a/coreconf/config.mk b/coreconf/config.mk index 2ee4797bb..741bbee2e 100644 --- a/coreconf/config.mk +++ b/coreconf/config.mk @@ -131,6 +131,19 @@ endif ####################################################################### # Master "Core Components" macros for Hardware features # ####################################################################### + +ifndef NSS_DISABLE_SSE3 + NSS_DISABLE_SSE3 = 0 + ifndef CC_IS_CLANG + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + NSS_DISABLE_SSE3 = 1 + endif + endif + ifeq (1,$(NSS_DISABLE_SSE3)) + export NSS_DISABLE_SSE3 + endif +endif #ndef NSS_DISABLE_SSE3 + ifndef NSS_DISABLE_AVX2 ifneq ($(CPU_ARCH),x86_64) # Disable AVX2 entirely on non-Intel platforms @@ -139,14 +152,9 @@ ifndef NSS_DISABLE_AVX2 else # Clang reports its version as an older gcc, but it's OK ifndef CC_IS_CLANG - ifneq (,$(filter 0 1 2 3,$(word 1,$(GCC_VERSION)))) + ifneq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) NSS_DISABLE_AVX2 = 1 endif - ifeq (4,$(word 1,$(GCC_VERSION))) - ifeq (,$(filter 8 9,$(word 2,$(GCC_VERSION)))) - NSS_DISABLE_AVX2 = 1 - endif - endif endif ifeq (1,$(NSS_DISABLE_AVX2)) $(warning Unable to find gcc 4.8 or greater, disabling -mavx2) @@ -179,6 +187,10 @@ ifdef NSS_DISABLE_AVX2 DEFINES += -DNSS_DISABLE_AVX2 endif +ifdef NSS_DISABLE_SSE3 +DEFINES += -DNSS_DISABLE_SSE3 +endif + ifdef NSS_DISABLE_CHACHAPOLY DEFINES += -DNSS_DISABLE_CHACHAPOLY endif diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile index a15db872f..f492d3274 100644 --- a/lib/freebl/Makefile +++ b/lib/freebl/Makefile @@ -572,18 +572,18 @@ ifneq ($(shell $(CC) -? 2>&1 >/dev/null </dev/null | sed -e 's/:.*//;1q'),lcc) HAVE_INT128_SUPPORT = 1 DEFINES += -DHAVE_INT128_SUPPORT endif - ifneq (,$(filter 0 1 2 3,$(word 1,$(GCC_VERSION)))) + ifneq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) NSS_DISABLE_AVX2 = 1 endif - ifeq (4,$(word 1,$(GCC_VERSION))) - ifeq (,$(filter 8 9,$(word 2,$(GCC_VERSION)))) - NSS_DISABLE_AVX2 = 1 - endif - endif ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) HAVE_INT128_SUPPORT = 1 DEFINES += -DHAVE_INT128_SUPPORT endif + ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) + NSS_DISABLE_SSE3 = 1 + NSS_DISABLE_SSE4_1 = 1 + NSS_DISABLE_SSE4_2 = 1 + endif endif endif # lcc endif # USE_64 @@ -596,8 +596,12 @@ ifndef NSS_DISABLE_CHACHAPOLY ifeq ($(CPU_ARCH),x86_64) ifndef NSS_DISABLE_AVX2 EXTRA_SRCS += Hacl_Poly1305_256.c Hacl_Chacha20_Vec256.c Hacl_Chacha20Poly1305_256.c + DEFINES += -DHACL_CAN_COMPILE_VEC256 endif # NSS_DISABLE_AVX2 - EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c + ifndef NSS_DISABLE_SSE3 + EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c + DEFINES += -DHACL_CAN_COMPILE_VEC128 + endif endif # x86_64 VERIFIED_SRCS += Hacl_Poly1305_32.c Hacl_Chacha20.c Hacl_Chacha20Poly1305_32.c @@ -639,7 +643,7 @@ vpath %.c mpi ecl verified deprecated vpath %.S mpi ecl vpath %.s mpi ecl vpath %.asm mpi ecl -INCLUDES += -Impi -Iecl -Iverified -Iverified/kremlin/include -Iverified/kremlin/kremlib/dist/minimal -Ideprecated +INCLUDES += -Impi -Iecl -Iverified -Iverified/internal -Iverified/karamel/include -Iverified/karamel/krmllib/dist/minimal -Ideprecated DEFINES += -DMP_API_COMPATIBLE diff --git a/lib/freebl/chacha20poly1305.c b/lib/freebl/chacha20poly1305.c index c442eb619..29bbc9d1c 100644 --- a/lib/freebl/chacha20poly1305.c +++ b/lib/freebl/chacha20poly1305.c @@ -207,17 +207,18 @@ ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k, uint8_t *nonce, uint32_t ctr) { #ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20_Vec256_chacha20_encrypt_256(len, output, block, k, nonce, ctr); + } +#endif + +#ifndef NSS_DISABLE_SSE3 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr); -#else - if (avx2_support()) { - Hacl_Chacha20_Vec256_chacha20_encrypt_256(len, output, block, k, nonce, ctr); - } else { - Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr); - } + } #endif - } else + #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) if (ppc_crypto_support()) { @@ -280,37 +281,41 @@ ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, } #ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20Poly1305_256_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, output + inputLen); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 Hacl_Chacha20Poly1305_128_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, output + inputLen); -#else - if (avx2_support()) { - Hacl_Chacha20Poly1305_256_aead_encrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, - (uint8_t *)input, output, output + inputLen); - } else { - Hacl_Chacha20Poly1305_128_aead_encrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, - (uint8_t *)input, output, output + inputLen); - } + goto finish; + } #endif - } else + #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) if (ppc_crypto_support()) { Chacha20Poly1305_vsx_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, output + inputLen); - } else + goto finish; + } #endif { Hacl_Chacha20Poly1305_32_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, output + inputLen); + goto finish; } +finish: *outputLen = inputLen + ctx->tagLen; return SECSuccess; #endif @@ -349,37 +354,41 @@ ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output, uint32_t res = 1; #ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + res = Hacl_Chacha20Poly1305_256_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 res = Hacl_Chacha20Poly1305_128_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); -#else - if (avx2_support()) { - res = Hacl_Chacha20Poly1305_256_aead_decrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, - (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); - } else { - res = Hacl_Chacha20Poly1305_128_aead_decrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, - (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); - } + goto finish; + } #endif - } else + #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) if (ppc_crypto_support()) { res = Chacha20Poly1305_vsx_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); - } else + goto finish; + } #endif { res = Hacl_Chacha20Poly1305_32_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); + goto finish; } +finish: if (res) { PORT_SetError(SEC_ERROR_BAD_DATA); return SECFailure; @@ -420,25 +429,42 @@ ChaCha20Poly1305_Encrypt(const ChaCha20Poly1305Context *ctx, } #ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + Hacl_Chacha20Poly1305_256_aead_encrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, + (uint8_t *)input, output, outTag); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 if (ssse3_support() && sse4_1_support() && avx_support()) { Hacl_Chacha20Poly1305_128_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, outTag); - } else + goto finish; + } +#endif + + else #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) if (ppc_crypto_support()) { Chacha20Poly1305_vsx_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, outTag); + goto finish; } else #endif { Hacl_Chacha20Poly1305_32_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, outTag); + goto finish; } +finish: *outputLen = inputLen; return SECSuccess; #endif @@ -477,25 +503,41 @@ ChaCha20Poly1305_Decrypt(const ChaCha20Poly1305Context *ctx, uint32_t res = 1; #ifdef NSS_X64 +#ifndef NSS_DISABLE_AVX2 + if (avx2_support()) { + res = Hacl_Chacha20Poly1305_256_aead_decrypt( + (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, + (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; + } +#endif + +#ifndef NSS_DISABLE_SSE3 if (ssse3_support() && sse4_1_support() && avx_support()) { res = Hacl_Chacha20Poly1305_128_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); - } else + goto finish; + } +#endif + #elif defined(__powerpc64__) && defined(__LITTLE_ENDIAN__) && \ !defined(NSS_DISABLE_ALTIVEC) && !defined(NSS_DISABLE_CRYPTO_VSX) if (ppc_crypto_support()) { res = Chacha20Poly1305_vsx_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); - } else + goto finish; + } #endif { res = Hacl_Chacha20Poly1305_32_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)tagIn); + goto finish; } +finish: if (res) { PORT_SetError(SEC_ERROR_BAD_DATA); return SECFailure; diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp index 72e1b9b73..23940ef77 100644 --- a/lib/freebl/freebl.gyp +++ b/lib/freebl/freebl.gyp @@ -761,8 +761,9 @@ 'mpi', 'ecl', 'verified', - 'verified/kremlin/include', - 'verified/kremlin/kremlib/dist/minimal', + 'verified/internal', + 'verified/karamel/include', + 'verified/karamel/krmllib/dist/minimal', 'deprecated', ], 'defines': [ @@ -833,6 +834,13 @@ 'MP_IS_LITTLE_ENDIAN', ], }], + # Poly1305_256 requires the flag to run + ['target_arch=="x64"', { + 'defines':[ + 'HACL_CAN_COMPILE_VEC128', + 'HACL_CAN_COMPILE_VEC256', + ], + }], # MSVC has no __int128 type. Use emulated int128 and leave # have_int128_support as-is for Curve25519 impl. selection. [ 'have_int128_support==1 and (OS!="win" or cc_is_clang==1 or cc_is_gcc==1)', { diff --git a/lib/freebl/verified/Hacl_Bignum25519_51.h b/lib/freebl/verified/Hacl_Bignum25519_51.h index 173f11188..d53e43c21 100644 --- a/lib/freebl/verified/Hacl_Bignum25519_51.h +++ b/lib/freebl/verified/Hacl_Bignum25519_51.h @@ -28,12 +28,12 @@ extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" +#include "Hacl_Krmllib.h" static inline void Hacl_Impl_Curve25519_Field51_fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) @@ -661,11 +661,13 @@ static inline void Hacl_Impl_Curve25519_Field51_cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) { uint64_t mask = (uint64_t)0U - bit; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)10U; i++) { - uint64_t dummy = mask & (p1[i] ^ p2[i]); - p1[i] = p1[i] ^ dummy; - p2[i] = p2[i] ^ dummy; - } + KRML_MAYBE_FOR10(i, + (uint32_t)0U, + (uint32_t)10U, + (uint32_t)1U, + uint64_t dummy = mask & (p1[i] ^ p2[i]); + p1[i] = p1[i] ^ dummy; + p2[i] = p2[i] ^ dummy;); } #if defined(__cplusplus) diff --git a/lib/freebl/verified/Hacl_Chacha20.c b/lib/freebl/verified/Hacl_Chacha20.c index 663daf566..d8827b3bc 100644 --- a/lib/freebl/verified/Hacl_Chacha20.c +++ b/lib/freebl/verified/Hacl_Chacha20.c @@ -21,7 +21,7 @@ * SOFTWARE. */ -#include "Hacl_Chacha20.h" +#include "internal/Hacl_Chacha20.h" const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; @@ -98,69 +98,80 @@ chacha20_core(uint32_t *k, uint32_t *ctx, uint32_t ctr) uint32_t ctr_u32 = ctr; k[12U] = k[12U] + ctr_u32; rounds(k); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint32_t *os = k; - uint32_t x = k[i] + ctx[i]; - os[i] = x; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = k; + uint32_t x = k[i] + ctx[i]; + os[i] = x;); k[12U] = k[12U] + ctr_u32; } static const uint32_t chacha20_constants[4U] = { (uint32_t)0x61707865U, (uint32_t)0x3320646eU, (uint32_t)0x79622d32U, (uint32_t)0x6b206574U }; -static inline void -chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) +void +Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) { - uint32_t *uu____0 = ctx; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) { - uint32_t *os = uu____0; - uint32_t x = chacha20_constants[i]; - os[i] = x; - } - uint32_t *uu____1 = ctx + (uint32_t)4U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) { - uint32_t *os = uu____1; - uint8_t *bj = k + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx; + uint32_t x = chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); ctx[12U] = ctr; - uint32_t *uu____2 = ctx + (uint32_t)13U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) { - uint32_t *os = uu____2; - uint8_t *bj = n + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); } -static inline void +static void chacha20_encrypt_block(uint32_t *ctx, uint8_t *out, uint32_t incr, uint8_t *text) { uint32_t k[16U] = { 0U }; chacha20_core(k, ctx, incr); uint32_t bl[16U] = { 0U }; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint32_t *os = bl; - uint8_t *bj = text + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint32_t *os = bl; - uint32_t x = bl[i] ^ k[i]; - os[i] = x; - } - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - store32_le(out + i * (uint32_t)4U, bl[i]); - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint8_t *bj = text + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint32_t *os = bl; + uint32_t x = bl[i] ^ k[i]; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + store32_le(out + i * (uint32_t)4U, bl[i]);); } static inline void @@ -172,8 +183,8 @@ chacha20_encrypt_last(uint32_t *ctx, uint32_t len, uint8_t *out, uint32_t incr, memcpy(out, plain, len * sizeof(uint8_t)); } -static inline void -chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text) +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text) { uint32_t rem = len % (uint32_t)64U; uint32_t nb = len / (uint32_t)64U; @@ -196,8 +207,8 @@ Hacl_Chacha20_chacha20_encrypt( uint32_t ctr) { uint32_t ctx[16U] = { 0U }; - chacha20_init(ctx, key, n, ctr); - chacha20_update(ctx, len, out, text); + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, text); } void @@ -210,6 +221,6 @@ Hacl_Chacha20_chacha20_decrypt( uint32_t ctr) { uint32_t ctx[16U] = { 0U }; - chacha20_init(ctx, key, n, ctr); - chacha20_update(ctx, len, out, cipher); + Hacl_Impl_Chacha20_chacha20_init(ctx, key, n, ctr); + Hacl_Impl_Chacha20_chacha20_update(ctx, len, out, cipher); } diff --git a/lib/freebl/verified/Hacl_Chacha20.h b/lib/freebl/verified/Hacl_Chacha20.h index 850544234..56f2ae064 100644 --- a/lib/freebl/verified/Hacl_Chacha20.h +++ b/lib/freebl/verified/Hacl_Chacha20.h @@ -28,14 +28,12 @@ extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" - -extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; +#include "Hacl_Krmllib.h" void Hacl_Chacha20_chacha20_encrypt( diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c index e45fcd9df..d7ee9647a 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.c @@ -23,6 +23,9 @@ #include "Hacl_Chacha20Poly1305_128.h" +#include "internal/Hacl_Poly1305_128.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" static inline void poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t *text) { @@ -44,9 +47,8 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t uint32_t nb = len1 / bs; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); Lib_IntVector_Intrinsics_vec128 b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); @@ -268,9 +270,8 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t uint32_t rem1 = len1 % (uint32_t)16U; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = t10 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -476,9 +477,8 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t } if (rem1 > (uint32_t)0U) { uint8_t *last = t10 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint8_t tmp[16U] = { 0U }; memcpy(tmp, last, rem1 * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); @@ -689,9 +689,8 @@ poly1305_padded_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint32_t len, uint8_t if (r > (uint32_t)0U) { Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec128 *acc = ctx; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); @@ -907,22 +906,22 @@ poly1305_do_128( uint8_t *m, uint8_t *out) { - Lib_IntVector_Intrinsics_vec128 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; uint8_t block[16U] = { 0U }; Hacl_Poly1305_128_poly1305_init(ctx, k); if (aadlen != (uint32_t)0U) { poly1305_padded_128(ctx, aadlen, aad); } - poly1305_padded_128(ctx, mlen, m); + if (mlen != (uint32_t)0U) { + poly1305_padded_128(ctx, mlen, m); + } store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec128 *acc = ctx; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -1163,10 +1162,12 @@ Hacl_Chacha20Poly1305_128_aead_decrypt( uint8_t *key = tmp; poly1305_do_128(key, aadlen, aad, mlen, cipher, computed_mac); uint8_t res = (uint8_t)255U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); - res = uu____0 & res; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); uint8_t z = res; if (z == (uint8_t)255U) { Hacl_Chacha20_Vec128_chacha20_encrypt_128(mlen, m, cipher, k, n, (uint32_t)1U); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h index bf5f198a7..01e2a4f51 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_128.h @@ -28,15 +28,14 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" -#include "Hacl_Chacha20_Vec128.h" #include "Hacl_Poly1305_128.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec128.h" void Hacl_Chacha20Poly1305_128_aead_encrypt( diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c index efa598ae1..a4e54f1e2 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c @@ -23,6 +23,9 @@ #include "Hacl_Chacha20Poly1305_256.h" +#include "internal/Hacl_Poly1305_256.h" +#include "internal/Hacl_Krmllib.h" +#include "libintvector.h" static inline void poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) { @@ -44,9 +47,8 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t uint32_t nb = len1 / bs; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); Lib_IntVector_Intrinsics_vec256 hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); @@ -270,9 +272,8 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t uint32_t rem1 = len1 % (uint32_t)16U; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = t10 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -478,9 +479,8 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t } if (rem1 > (uint32_t)0U) { uint8_t *last = t10 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint8_t tmp[16U] = { 0U }; memcpy(tmp, last, rem1 * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); @@ -691,9 +691,8 @@ poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t if (r > (uint32_t)0U) { Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint64_t u0 = load64_le(tmp); uint64_t lo = u0; uint64_t u = load64_le(tmp + (uint32_t)8U); @@ -909,22 +908,22 @@ poly1305_do_256( uint8_t *m, uint8_t *out) { - Lib_IntVector_Intrinsics_vec256 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; uint8_t block[16U] = { 0U }; Hacl_Poly1305_256_poly1305_init(ctx, k); if (aadlen != (uint32_t)0U) { poly1305_padded_256(ctx, aadlen, aad); } - poly1305_padded_256(ctx, mlen, m); + if (mlen != (uint32_t)0U) { + poly1305_padded_256(ctx, mlen, m); + } store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -1165,10 +1164,12 @@ Hacl_Chacha20Poly1305_256_aead_decrypt( uint8_t *key = tmp; poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); uint8_t res = (uint8_t)255U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); - res = uu____0 & res; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); uint8_t z = res; if (z == (uint8_t)255U) { Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n, (uint32_t)1U); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h index 09ebbbf3d..9a81e01f5 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h @@ -28,15 +28,14 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" -#include "Hacl_Chacha20_Vec256.h" #include "Hacl_Poly1305_256.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20_Vec256.h" void Hacl_Chacha20Poly1305_256_aead_encrypt( diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c index 493a31695..f8efb0037 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.c @@ -23,6 +23,8 @@ #include "Hacl_Chacha20Poly1305_32.h" +#include "internal/Hacl_Krmllib.h" + static inline void poly1305_padded_32(uint64_t *ctx, uint32_t len, uint8_t *text) { @@ -414,7 +416,9 @@ poly1305_do_32( if (aadlen != (uint32_t)0U) { poly1305_padded_32(ctx, aadlen, aad); } - poly1305_padded_32(ctx, mlen, m); + if (mlen != (uint32_t)0U) { + poly1305_padded_32(ctx, mlen, m); + } store64_le(block, (uint64_t)aadlen); store64_le(block + (uint32_t)8U, (uint64_t)mlen); uint64_t *pre = ctx + (uint32_t)5U; @@ -573,10 +577,12 @@ Hacl_Chacha20Poly1305_32_aead_decrypt( uint8_t *key = tmp; poly1305_do_32(key, aadlen, aad, mlen, cipher, computed_mac); uint8_t res = (uint8_t)255U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); - res = uu____0 & res; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); + res = uu____0 & res;); uint8_t z = res; if (z == (uint8_t)255U) { Hacl_Chacha20_chacha20_encrypt(mlen, m, cipher, k, n, (uint32_t)1U); diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h index f7854685c..a3d23d6d3 100644 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h +++ b/lib/freebl/verified/Hacl_Chacha20Poly1305_32.h @@ -28,14 +28,14 @@ extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Chacha20.h" -#include "Hacl_Kremlib.h" #include "Hacl_Poly1305_32.h" +#include "Hacl_Krmllib.h" +#include "Hacl_Chacha20.h" void Hacl_Chacha20Poly1305_32_aead_encrypt( diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.c b/lib/freebl/verified/Hacl_Chacha20_Vec128.c index 485c78d34..697a36bb3 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.c +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.c @@ -23,6 +23,8 @@ #include "Hacl_Chacha20_Vec128.h" +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" static inline void double_round_128(Lib_IntVector_Intrinsics_vec128 *st) { @@ -144,11 +146,13 @@ chacha20_core_128( double_round_128(k); double_round_128(k); double_round_128(k); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec128 *os = k; - Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]); - os[i] = x; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = k; + Lib_IntVector_Intrinsics_vec128 x = Lib_IntVector_Intrinsics_vec128_add32(k[i], ctx[i]); + os[i] = x;); k[12U] = Lib_IntVector_Intrinsics_vec128_add32(k[12U], cv); } @@ -156,37 +160,42 @@ static inline void chacha20_init_128(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) { uint32_t ctx1[16U] = { 0U }; - uint32_t *uu____0 = ctx1; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) { - uint32_t *os = uu____0; - uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; - os[i] = x; - } - uint32_t *uu____1 = ctx1 + (uint32_t)4U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) { - uint32_t *os = uu____1; - uint8_t *bj = k + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); ctx1[12U] = ctr; - uint32_t *uu____2 = ctx1 + (uint32_t)13U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) { - uint32_t *os = uu____2; - uint8_t *bj = n + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec128 *os = ctx; - uint32_t x = ctx1[i]; - Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x); - os[i] = x0; - } + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec128 x0 = Lib_IntVector_Intrinsics_vec128_load32(x); + os[i] = x0;); Lib_IntVector_Intrinsics_vec128 ctr1 = Lib_IntVector_Intrinsics_vec128_load32s((uint32_t)0U, @@ -206,9 +215,8 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( uint8_t *n, uint32_t ctr) { - Lib_IntVector_Intrinsics_vec128 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_init_128(ctx, key, n, ctr); uint32_t rem = len % (uint32_t)256U; uint32_t nb = len / (uint32_t)256U; @@ -216,22 +224,33 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *uu____0 = out + i * (uint32_t)256U; uint8_t *uu____1 = text + i * (uint32_t)256U; - Lib_IntVector_Intrinsics_vec128 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_core_128(k, ctx, i); - Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec128 - v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec128 - v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); Lib_IntVector_Intrinsics_vec128 v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); Lib_IntVector_Intrinsics_vec128 @@ -240,82 +259,86 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); Lib_IntVector_Intrinsics_vec128 v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec128 v0 = v0__; - Lib_IntVector_Intrinsics_vec128 v1 = v1__; - Lib_IntVector_Intrinsics_vec128 v2 = v2__; - Lib_IntVector_Intrinsics_vec128 v3 = v3__; - Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; - Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; - Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; - Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; Lib_IntVector_Intrinsics_vec128 - v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec128 - v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec128 v4 = v0__0; - Lib_IntVector_Intrinsics_vec128 v5 = v1__0; - Lib_IntVector_Intrinsics_vec128 v6 = v2__0; - Lib_IntVector_Intrinsics_vec128 v7 = v3__0; - Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; - Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; - Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; - Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; Lib_IntVector_Intrinsics_vec128 - v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); Lib_IntVector_Intrinsics_vec128 - v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); - Lib_IntVector_Intrinsics_vec128 v8 = v0__1; - Lib_IntVector_Intrinsics_vec128 v9 = v1__1; - Lib_IntVector_Intrinsics_vec128 v10 = v2__1; - Lib_IntVector_Intrinsics_vec128 v11 = v3__1; - Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; - Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; - Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; - Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; Lib_IntVector_Intrinsics_vec128 - v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); Lib_IntVector_Intrinsics_vec128 - v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); - Lib_IntVector_Intrinsics_vec128 v12 = v0__2; - Lib_IntVector_Intrinsics_vec128 v13 = v1__2; - Lib_IntVector_Intrinsics_vec128 v14 = v2__2; - Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; k[0U] = v0; k[1U] = v4; k[2U] = v8; @@ -332,34 +355,47 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( k[13U] = v7; k[14U] = v11; k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); - Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y); - } + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)256U; uint8_t *uu____3 = text + nb * (uint32_t)256U; uint8_t plain[256U] = { 0U }; memcpy(plain, uu____3, rem * sizeof(uint8_t)); - Lib_IntVector_Intrinsics_vec128 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_core_128(k, ctx, nb); - Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec128 - v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec128 - v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); Lib_IntVector_Intrinsics_vec128 v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); Lib_IntVector_Intrinsics_vec128 @@ -368,82 +404,86 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); Lib_IntVector_Intrinsics_vec128 v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec128 v0 = v0__; - Lib_IntVector_Intrinsics_vec128 v1 = v1__; - Lib_IntVector_Intrinsics_vec128 v2 = v2__; - Lib_IntVector_Intrinsics_vec128 v3 = v3__; - Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; - Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; - Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; - Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; Lib_IntVector_Intrinsics_vec128 - v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec128 - v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec128 v4 = v0__0; - Lib_IntVector_Intrinsics_vec128 v5 = v1__0; - Lib_IntVector_Intrinsics_vec128 v6 = v2__0; - Lib_IntVector_Intrinsics_vec128 v7 = v3__0; - Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; - Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; - Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; - Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; Lib_IntVector_Intrinsics_vec128 - v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); Lib_IntVector_Intrinsics_vec128 - v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); - Lib_IntVector_Intrinsics_vec128 v8 = v0__1; - Lib_IntVector_Intrinsics_vec128 v9 = v1__1; - Lib_IntVector_Intrinsics_vec128 v10 = v2__1; - Lib_IntVector_Intrinsics_vec128 v11 = v3__1; - Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; - Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; - Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; - Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; Lib_IntVector_Intrinsics_vec128 - v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); Lib_IntVector_Intrinsics_vec128 - v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); - Lib_IntVector_Intrinsics_vec128 v12 = v0__2; - Lib_IntVector_Intrinsics_vec128 v13 = v1__2; - Lib_IntVector_Intrinsics_vec128 v14 = v2__2; - Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; k[0U] = v0; k[1U] = v4; k[2U] = v8; @@ -460,12 +500,14 @@ Hacl_Chacha20_Vec128_chacha20_encrypt_128( k[13U] = v7; k[14U] = v11; k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); - Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y); - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } @@ -479,9 +521,8 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( uint8_t *n, uint32_t ctr) { - Lib_IntVector_Intrinsics_vec128 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_init_128(ctx, key, n, ctr); uint32_t rem = len % (uint32_t)256U; uint32_t nb = len / (uint32_t)256U; @@ -489,22 +530,33 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *uu____0 = out + i * (uint32_t)256U; uint8_t *uu____1 = cipher + i * (uint32_t)256U; - Lib_IntVector_Intrinsics_vec128 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_core_128(k, ctx, i); - Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec128 - v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec128 - v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); Lib_IntVector_Intrinsics_vec128 v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); Lib_IntVector_Intrinsics_vec128 @@ -513,82 +565,86 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); Lib_IntVector_Intrinsics_vec128 v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec128 v0 = v0__; - Lib_IntVector_Intrinsics_vec128 v1 = v1__; - Lib_IntVector_Intrinsics_vec128 v2 = v2__; - Lib_IntVector_Intrinsics_vec128 v3 = v3__; - Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; - Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; - Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; - Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; Lib_IntVector_Intrinsics_vec128 - v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec128 - v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec128 v4 = v0__0; - Lib_IntVector_Intrinsics_vec128 v5 = v1__0; - Lib_IntVector_Intrinsics_vec128 v6 = v2__0; - Lib_IntVector_Intrinsics_vec128 v7 = v3__0; - Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; - Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; - Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; - Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; Lib_IntVector_Intrinsics_vec128 - v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); Lib_IntVector_Intrinsics_vec128 - v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); - Lib_IntVector_Intrinsics_vec128 v8 = v0__1; - Lib_IntVector_Intrinsics_vec128 v9 = v1__1; - Lib_IntVector_Intrinsics_vec128 v10 = v2__1; - Lib_IntVector_Intrinsics_vec128 v11 = v3__1; - Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; - Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; - Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; - Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; Lib_IntVector_Intrinsics_vec128 - v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); Lib_IntVector_Intrinsics_vec128 - v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); - Lib_IntVector_Intrinsics_vec128 v12 = v0__2; - Lib_IntVector_Intrinsics_vec128 v13 = v1__2; - Lib_IntVector_Intrinsics_vec128 v14 = v2__2; - Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; k[0U] = v0; k[1U] = v4; k[2U] = v8; @@ -605,34 +661,47 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( k[13U] = v7; k[14U] = v11; k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); - Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y); - } + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(uu____1 + i0 * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec128_store32_le(uu____0 + i0 * (uint32_t)16U, y);); } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)256U; uint8_t *uu____3 = cipher + nb * (uint32_t)256U; uint8_t plain[256U] = { 0U }; memcpy(plain, uu____3, rem * sizeof(uint8_t)); - Lib_IntVector_Intrinsics_vec128 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 k[16U] KRML_POST_ALIGN(16) = { 0U }; chacha20_core_128(k, ctx, nb); - Lib_IntVector_Intrinsics_vec128 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec128 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec128 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec128 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec128 - v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec128 - v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec128 - v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(v20, v30); + Lib_IntVector_Intrinsics_vec128 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec128 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec128 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec128 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec128 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec128 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec128 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec128 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec128 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec128 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec128 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec128 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec128 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec128 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec128 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec128 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec128 + v0_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v1_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st0, st1); + Lib_IntVector_Intrinsics_vec128 + v2_ = Lib_IntVector_Intrinsics_vec128_interleave_low32(st2, st3); + Lib_IntVector_Intrinsics_vec128 + v3_ = Lib_IntVector_Intrinsics_vec128_interleave_high32(st2, st3); Lib_IntVector_Intrinsics_vec128 v0__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_, v2_); Lib_IntVector_Intrinsics_vec128 @@ -641,82 +710,86 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( v2__ = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_, v3_); Lib_IntVector_Intrinsics_vec128 v3__ = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec128 v0 = v0__; - Lib_IntVector_Intrinsics_vec128 v1 = v1__; - Lib_IntVector_Intrinsics_vec128 v2 = v2__; - Lib_IntVector_Intrinsics_vec128 v3 = v3__; - Lib_IntVector_Intrinsics_vec128 v010 = k[4U]; - Lib_IntVector_Intrinsics_vec128 v110 = k[5U]; - Lib_IntVector_Intrinsics_vec128 v210 = k[6U]; - Lib_IntVector_Intrinsics_vec128 v310 = k[7U]; + Lib_IntVector_Intrinsics_vec128 v0__0 = v0__; + Lib_IntVector_Intrinsics_vec128 v2__0 = v2__; + Lib_IntVector_Intrinsics_vec128 v1__0 = v1__; + Lib_IntVector_Intrinsics_vec128 v3__0 = v3__; + Lib_IntVector_Intrinsics_vec128 v0 = v0__0; + Lib_IntVector_Intrinsics_vec128 v1 = v1__0; + Lib_IntVector_Intrinsics_vec128 v2 = v2__0; + Lib_IntVector_Intrinsics_vec128 v3 = v3__0; Lib_IntVector_Intrinsics_vec128 - v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v010, v110); + v0_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v010, v110); + v1_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st4, st5); Lib_IntVector_Intrinsics_vec128 - v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v210, v310); + v2_0 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v210, v310); + v3_0 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st6, st7); Lib_IntVector_Intrinsics_vec128 - v0__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); + v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v1__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); + v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec128 - v2__0 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); + v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec128 - v3__0 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec128 v4 = v0__0; - Lib_IntVector_Intrinsics_vec128 v5 = v1__0; - Lib_IntVector_Intrinsics_vec128 v6 = v2__0; - Lib_IntVector_Intrinsics_vec128 v7 = v3__0; - Lib_IntVector_Intrinsics_vec128 v011 = k[8U]; - Lib_IntVector_Intrinsics_vec128 v111 = k[9U]; - Lib_IntVector_Intrinsics_vec128 v211 = k[10U]; - Lib_IntVector_Intrinsics_vec128 v311 = k[11U]; + v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_0, v3_0); + Lib_IntVector_Intrinsics_vec128 v0__2 = v0__1; + Lib_IntVector_Intrinsics_vec128 v2__2 = v2__1; + Lib_IntVector_Intrinsics_vec128 v1__2 = v1__1; + Lib_IntVector_Intrinsics_vec128 v3__2 = v3__1; + Lib_IntVector_Intrinsics_vec128 v4 = v0__2; + Lib_IntVector_Intrinsics_vec128 v5 = v1__2; + Lib_IntVector_Intrinsics_vec128 v6 = v2__2; + Lib_IntVector_Intrinsics_vec128 v7 = v3__2; Lib_IntVector_Intrinsics_vec128 - v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v011, v111); + v0_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v011, v111); + v1_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st8, st9); Lib_IntVector_Intrinsics_vec128 - v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v211, v311); + v2_1 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v211, v311); + v3_1 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st10, st11); Lib_IntVector_Intrinsics_vec128 - v0__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); + v0__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v1__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); + v1__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_1, v2_1); Lib_IntVector_Intrinsics_vec128 - v2__1 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); + v2__3 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_1, v3_1); Lib_IntVector_Intrinsics_vec128 - v3__1 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); - Lib_IntVector_Intrinsics_vec128 v8 = v0__1; - Lib_IntVector_Intrinsics_vec128 v9 = v1__1; - Lib_IntVector_Intrinsics_vec128 v10 = v2__1; - Lib_IntVector_Intrinsics_vec128 v11 = v3__1; - Lib_IntVector_Intrinsics_vec128 v01 = k[12U]; - Lib_IntVector_Intrinsics_vec128 v120 = k[13U]; - Lib_IntVector_Intrinsics_vec128 v21 = k[14U]; - Lib_IntVector_Intrinsics_vec128 v31 = k[15U]; + v3__3 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_1, v3_1); + Lib_IntVector_Intrinsics_vec128 v0__4 = v0__3; + Lib_IntVector_Intrinsics_vec128 v2__4 = v2__3; + Lib_IntVector_Intrinsics_vec128 v1__4 = v1__3; + Lib_IntVector_Intrinsics_vec128 v3__4 = v3__3; + Lib_IntVector_Intrinsics_vec128 v8 = v0__4; + Lib_IntVector_Intrinsics_vec128 v9 = v1__4; + Lib_IntVector_Intrinsics_vec128 v10 = v2__4; + Lib_IntVector_Intrinsics_vec128 v11 = v3__4; Lib_IntVector_Intrinsics_vec128 - v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v01, v120); + v0_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v01, v120); + v1_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st12, st13); Lib_IntVector_Intrinsics_vec128 - v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(v21, v31); + v2_2 = Lib_IntVector_Intrinsics_vec128_interleave_low32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(v21, v31); + v3_2 = Lib_IntVector_Intrinsics_vec128_interleave_high32(st14, st15); Lib_IntVector_Intrinsics_vec128 - v0__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); + v0__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v1__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); + v1__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v0_2, v2_2); Lib_IntVector_Intrinsics_vec128 - v2__2 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); + v2__5 = Lib_IntVector_Intrinsics_vec128_interleave_low64(v1_2, v3_2); Lib_IntVector_Intrinsics_vec128 - v3__2 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); - Lib_IntVector_Intrinsics_vec128 v12 = v0__2; - Lib_IntVector_Intrinsics_vec128 v13 = v1__2; - Lib_IntVector_Intrinsics_vec128 v14 = v2__2; - Lib_IntVector_Intrinsics_vec128 v15 = v3__2; + v3__5 = Lib_IntVector_Intrinsics_vec128_interleave_high64(v1_2, v3_2); + Lib_IntVector_Intrinsics_vec128 v0__6 = v0__5; + Lib_IntVector_Intrinsics_vec128 v2__6 = v2__5; + Lib_IntVector_Intrinsics_vec128 v1__6 = v1__5; + Lib_IntVector_Intrinsics_vec128 v3__6 = v3__5; + Lib_IntVector_Intrinsics_vec128 v12 = v0__6; + Lib_IntVector_Intrinsics_vec128 v13 = v1__6; + Lib_IntVector_Intrinsics_vec128 v14 = v2__6; + Lib_IntVector_Intrinsics_vec128 v15 = v3__6; k[0U] = v0; k[1U] = v4; k[2U] = v8; @@ -733,12 +806,14 @@ Hacl_Chacha20_Vec128_chacha20_decrypt_128( k[13U] = v7; k[14U] = v11; k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec128 - x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); - Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y); - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec128 + x = Lib_IntVector_Intrinsics_vec128_load32_le(plain + i * (uint32_t)16U); + Lib_IntVector_Intrinsics_vec128 y = Lib_IntVector_Intrinsics_vec128_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec128_store32_le(plain + i * (uint32_t)16U, y);); memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec128.h b/lib/freebl/verified/Hacl_Chacha20_Vec128.h index 6b3a8e08b..52b8d249f 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec128.h +++ b/lib/freebl/verified/Hacl_Chacha20_Vec128.h @@ -28,14 +28,12 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Chacha20.h" -#include "Hacl_Kremlib.h" +#include "Hacl_Krmllib.h" void Hacl_Chacha20_Vec128_chacha20_encrypt_128( diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/lib/freebl/verified/Hacl_Chacha20_Vec256.c index 72b235123..6c3e8488e 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.c +++ b/lib/freebl/verified/Hacl_Chacha20_Vec256.c @@ -23,6 +23,8 @@ #include "Hacl_Chacha20_Vec256.h" +#include "internal/Hacl_Chacha20.h" +#include "libintvector.h" static inline void double_round_256(Lib_IntVector_Intrinsics_vec256 *st) { @@ -144,11 +146,13 @@ chacha20_core_256( double_round_256(k); double_round_256(k); double_round_256(k); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 *os = k; - Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]); - os[i] = x; - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = k; + Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]); + os[i] = x;); k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); } @@ -156,37 +160,42 @@ static inline void chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n, uint32_t ctr) { uint32_t ctx1[16U] = { 0U }; - uint32_t *uu____0 = ctx1; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) { - uint32_t *os = uu____0; - uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; - os[i] = x; - } - uint32_t *uu____1 = ctx1 + (uint32_t)4U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) { - uint32_t *os = uu____1; - uint8_t *bj = k + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint32_t *os = ctx1; + uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; + os[i] = x;); + KRML_MAYBE_FOR8(i, + (uint32_t)0U, + (uint32_t)8U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)4U; + uint8_t *bj = k + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); ctx1[12U] = ctr; - uint32_t *uu____2 = ctx1 + (uint32_t)13U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) { - uint32_t *os = uu____2; - uint8_t *bj = n + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 *os = ctx; - uint32_t x = ctx1[i]; - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x); - os[i] = x0; - } + KRML_MAYBE_FOR3(i, + (uint32_t)0U, + (uint32_t)3U, + (uint32_t)1U, + uint32_t *os = ctx1 + (uint32_t)13U; + uint8_t *bj = n + i * (uint32_t)4U; + uint32_t u = load32_le(bj); + uint32_t r = u; + uint32_t x = r; + os[i] = x;); + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 *os = ctx; + uint32_t x = ctx1[i]; + Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x); + os[i] = x0;); Lib_IntVector_Intrinsics_vec256 ctr1 = Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U, @@ -210,9 +219,8 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( uint8_t *n, uint32_t ctr) { - Lib_IntVector_Intrinsics_vec256 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_init_256(ctx, key, n, ctr); uint32_t rem = len % (uint32_t)512U; uint32_t nb = len / (uint32_t)512U; @@ -220,18 +228,33 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *uu____0 = out + i * (uint32_t)512U; uint8_t *uu____1 = text + i * (uint32_t)512U; - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_core_256(k, ctx, i); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; Lib_IntVector_Intrinsics_vec256 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); Lib_IntVector_Intrinsics_vec256 @@ -248,110 +271,174 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); Lib_IntVector_Intrinsics_vec256 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; k[0U] = v0; k[1U] = v8; k[2U] = v1; @@ -368,30 +455,47 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( k[13U] = v14; k[14U] = v7; k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y); - } + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)512U; uint8_t *uu____3 = text + nb * (uint32_t)512U; uint8_t plain[512U] = { 0U }; memcpy(plain, uu____3, rem * sizeof(uint8_t)); - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_core_256(k, ctx, nb); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; Lib_IntVector_Intrinsics_vec256 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); Lib_IntVector_Intrinsics_vec256 @@ -408,110 +512,174 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); Lib_IntVector_Intrinsics_vec256 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; k[0U] = v0; k[1U] = v8; k[2U] = v1; @@ -528,12 +696,14 @@ Hacl_Chacha20_Vec256_chacha20_encrypt_256( k[13U] = v14; k[14U] = v7; k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y); - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } @@ -547,9 +717,8 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( uint8_t *n, uint32_t ctr) { - Lib_IntVector_Intrinsics_vec256 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_init_256(ctx, key, n, ctr); uint32_t rem = len % (uint32_t)512U; uint32_t nb = len / (uint32_t)512U; @@ -557,18 +726,33 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *uu____0 = out + i * (uint32_t)512U; uint8_t *uu____1 = cipher + i * (uint32_t)512U; - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_core_256(k, ctx, i); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; Lib_IntVector_Intrinsics_vec256 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); Lib_IntVector_Intrinsics_vec256 @@ -585,110 +769,174 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); Lib_IntVector_Intrinsics_vec256 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; k[0U] = v0; k[1U] = v8; k[2U] = v1; @@ -705,30 +953,47 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( k[13U] = v14; k[14U] = v7; k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y); - } + KRML_MAYBE_FOR16(i0, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(uu____1 + i0 * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); + Lib_IntVector_Intrinsics_vec256_store32_le(uu____0 + i0 * (uint32_t)32U, y);); } if (rem1 > (uint32_t)0U) { uint8_t *uu____2 = out + nb * (uint32_t)512U; uint8_t *uu____3 = cipher + nb * (uint32_t)512U; uint8_t plain[512U] = { 0U }; memcpy(plain, uu____3, rem * sizeof(uint8_t)); - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 k[16U] KRML_POST_ALIGN(32) = { 0U }; chacha20_core_256(k, ctx, nb); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st0 = k[0U]; + Lib_IntVector_Intrinsics_vec256 st1 = k[1U]; + Lib_IntVector_Intrinsics_vec256 st2 = k[2U]; + Lib_IntVector_Intrinsics_vec256 st3 = k[3U]; + Lib_IntVector_Intrinsics_vec256 st4 = k[4U]; + Lib_IntVector_Intrinsics_vec256 st5 = k[5U]; + Lib_IntVector_Intrinsics_vec256 st6 = k[6U]; + Lib_IntVector_Intrinsics_vec256 st7 = k[7U]; + Lib_IntVector_Intrinsics_vec256 st8 = k[8U]; + Lib_IntVector_Intrinsics_vec256 st9 = k[9U]; + Lib_IntVector_Intrinsics_vec256 st10 = k[10U]; + Lib_IntVector_Intrinsics_vec256 st11 = k[11U]; + Lib_IntVector_Intrinsics_vec256 st12 = k[12U]; + Lib_IntVector_Intrinsics_vec256 st13 = k[13U]; + Lib_IntVector_Intrinsics_vec256 st14 = k[14U]; + Lib_IntVector_Intrinsics_vec256 st15 = k[15U]; + Lib_IntVector_Intrinsics_vec256 v00 = st0; + Lib_IntVector_Intrinsics_vec256 v16 = st1; + Lib_IntVector_Intrinsics_vec256 v20 = st2; + Lib_IntVector_Intrinsics_vec256 v30 = st3; + Lib_IntVector_Intrinsics_vec256 v40 = st4; + Lib_IntVector_Intrinsics_vec256 v50 = st5; + Lib_IntVector_Intrinsics_vec256 v60 = st6; + Lib_IntVector_Intrinsics_vec256 v70 = st7; Lib_IntVector_Intrinsics_vec256 v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); Lib_IntVector_Intrinsics_vec256 @@ -745,110 +1010,174 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); Lib_IntVector_Intrinsics_vec256 v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); + Lib_IntVector_Intrinsics_vec256 v0_0 = v0_; + Lib_IntVector_Intrinsics_vec256 v1_0 = v1_; + Lib_IntVector_Intrinsics_vec256 v2_0 = v2_; + Lib_IntVector_Intrinsics_vec256 v3_0 = v3_; + Lib_IntVector_Intrinsics_vec256 v4_0 = v4_; + Lib_IntVector_Intrinsics_vec256 v5_0 = v5_; + Lib_IntVector_Intrinsics_vec256 v6_0 = v6_; + Lib_IntVector_Intrinsics_vec256 v7_0 = v7_; Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + v0_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + v2_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + v1_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); + v3_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); + v4_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); + v6_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); + v5_1 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); + v7_1 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + Lib_IntVector_Intrinsics_vec256 v0_10 = v0_1; + Lib_IntVector_Intrinsics_vec256 v1_10 = v1_1; + Lib_IntVector_Intrinsics_vec256 v2_10 = v2_1; + Lib_IntVector_Intrinsics_vec256 v3_10 = v3_1; + Lib_IntVector_Intrinsics_vec256 v4_10 = v4_1; + Lib_IntVector_Intrinsics_vec256 v5_10 = v5_1; + Lib_IntVector_Intrinsics_vec256 v6_10 = v6_1; + Lib_IntVector_Intrinsics_vec256 v7_10 = v7_1; Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); + v0_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); + v4_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_10, v4_10); Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); + v1_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); + v5_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_10, v5_10); Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); + v2_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); + v6_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_10, v6_10); Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); + v3_2 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_10, v7_10); Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); + v7_2 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_10, v7_10); + Lib_IntVector_Intrinsics_vec256 v0_20 = v0_2; + Lib_IntVector_Intrinsics_vec256 v1_20 = v1_2; + Lib_IntVector_Intrinsics_vec256 v2_20 = v2_2; + Lib_IntVector_Intrinsics_vec256 v3_20 = v3_2; + Lib_IntVector_Intrinsics_vec256 v4_20 = v4_2; + Lib_IntVector_Intrinsics_vec256 v5_20 = v5_2; + Lib_IntVector_Intrinsics_vec256 v6_20 = v6_2; + Lib_IntVector_Intrinsics_vec256 v7_20 = v7_2; + Lib_IntVector_Intrinsics_vec256 v0_3 = v0_20; + Lib_IntVector_Intrinsics_vec256 v1_3 = v1_20; + Lib_IntVector_Intrinsics_vec256 v2_3 = v2_20; + Lib_IntVector_Intrinsics_vec256 v3_3 = v3_20; + Lib_IntVector_Intrinsics_vec256 v4_3 = v4_20; + Lib_IntVector_Intrinsics_vec256 v5_3 = v5_20; + Lib_IntVector_Intrinsics_vec256 v6_3 = v6_20; + Lib_IntVector_Intrinsics_vec256 v7_3 = v7_20; + Lib_IntVector_Intrinsics_vec256 v0 = v0_3; + Lib_IntVector_Intrinsics_vec256 v1 = v2_3; + Lib_IntVector_Intrinsics_vec256 v2 = v1_3; + Lib_IntVector_Intrinsics_vec256 v3 = v3_3; + Lib_IntVector_Intrinsics_vec256 v4 = v4_3; + Lib_IntVector_Intrinsics_vec256 v5 = v6_3; + Lib_IntVector_Intrinsics_vec256 v6 = v5_3; + Lib_IntVector_Intrinsics_vec256 v7 = v7_3; + Lib_IntVector_Intrinsics_vec256 v01 = st8; + Lib_IntVector_Intrinsics_vec256 v110 = st9; + Lib_IntVector_Intrinsics_vec256 v21 = st10; + Lib_IntVector_Intrinsics_vec256 v31 = st11; + Lib_IntVector_Intrinsics_vec256 v41 = st12; + Lib_IntVector_Intrinsics_vec256 v51 = st13; + Lib_IntVector_Intrinsics_vec256 v61 = st14; + Lib_IntVector_Intrinsics_vec256 v71 = st15; + Lib_IntVector_Intrinsics_vec256 + v0_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v1_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); + Lib_IntVector_Intrinsics_vec256 + v2_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v3_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); + Lib_IntVector_Intrinsics_vec256 + v4_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v5_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); + Lib_IntVector_Intrinsics_vec256 + v6_4 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); + Lib_IntVector_Intrinsics_vec256 + v7_4 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); + Lib_IntVector_Intrinsics_vec256 v0_5 = v0_4; + Lib_IntVector_Intrinsics_vec256 v1_5 = v1_4; + Lib_IntVector_Intrinsics_vec256 v2_5 = v2_4; + Lib_IntVector_Intrinsics_vec256 v3_5 = v3_4; + Lib_IntVector_Intrinsics_vec256 v4_5 = v4_4; + Lib_IntVector_Intrinsics_vec256 v5_5 = v5_4; + Lib_IntVector_Intrinsics_vec256 v6_5 = v6_4; + Lib_IntVector_Intrinsics_vec256 v7_5 = v7_4; + Lib_IntVector_Intrinsics_vec256 + v0_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); + v2_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_5, v2_5); Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); + v1_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_5, v3_5); Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; + v3_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_5, v3_5); + Lib_IntVector_Intrinsics_vec256 + v4_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v6_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_5, v6_5); + Lib_IntVector_Intrinsics_vec256 + v5_11 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 + v7_11 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_5, v7_5); + Lib_IntVector_Intrinsics_vec256 v0_12 = v0_11; + Lib_IntVector_Intrinsics_vec256 v1_12 = v1_11; + Lib_IntVector_Intrinsics_vec256 v2_12 = v2_11; + Lib_IntVector_Intrinsics_vec256 v3_12 = v3_11; + Lib_IntVector_Intrinsics_vec256 v4_12 = v4_11; + Lib_IntVector_Intrinsics_vec256 v5_12 = v5_11; + Lib_IntVector_Intrinsics_vec256 v6_12 = v6_11; + Lib_IntVector_Intrinsics_vec256 v7_12 = v7_11; + Lib_IntVector_Intrinsics_vec256 + v0_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v4_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0_12, v4_12); + Lib_IntVector_Intrinsics_vec256 + v1_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v5_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1_12, v5_12); + Lib_IntVector_Intrinsics_vec256 + v2_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v6_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2_12, v6_12); + Lib_IntVector_Intrinsics_vec256 + v3_21 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 + v7_21 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3_12, v7_12); + Lib_IntVector_Intrinsics_vec256 v0_22 = v0_21; + Lib_IntVector_Intrinsics_vec256 v1_22 = v1_21; + Lib_IntVector_Intrinsics_vec256 v2_22 = v2_21; + Lib_IntVector_Intrinsics_vec256 v3_22 = v3_21; + Lib_IntVector_Intrinsics_vec256 v4_22 = v4_21; + Lib_IntVector_Intrinsics_vec256 v5_22 = v5_21; + Lib_IntVector_Intrinsics_vec256 v6_22 = v6_21; + Lib_IntVector_Intrinsics_vec256 v7_22 = v7_21; + Lib_IntVector_Intrinsics_vec256 v0_6 = v0_22; + Lib_IntVector_Intrinsics_vec256 v1_6 = v1_22; + Lib_IntVector_Intrinsics_vec256 v2_6 = v2_22; + Lib_IntVector_Intrinsics_vec256 v3_6 = v3_22; + Lib_IntVector_Intrinsics_vec256 v4_6 = v4_22; + Lib_IntVector_Intrinsics_vec256 v5_6 = v5_22; + Lib_IntVector_Intrinsics_vec256 v6_6 = v6_22; + Lib_IntVector_Intrinsics_vec256 v7_6 = v7_22; + Lib_IntVector_Intrinsics_vec256 v8 = v0_6; + Lib_IntVector_Intrinsics_vec256 v9 = v2_6; + Lib_IntVector_Intrinsics_vec256 v10 = v1_6; + Lib_IntVector_Intrinsics_vec256 v11 = v3_6; + Lib_IntVector_Intrinsics_vec256 v12 = v4_6; + Lib_IntVector_Intrinsics_vec256 v13 = v6_6; + Lib_IntVector_Intrinsics_vec256 v14 = v5_6; + Lib_IntVector_Intrinsics_vec256 v15 = v7_6; k[0U] = v0; k[1U] = v8; k[2U] = v1; @@ -865,12 +1194,14 @@ Hacl_Chacha20_Vec256_chacha20_decrypt_256( k[13U] = v14; k[14U] = v7; k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y); - } + KRML_MAYBE_FOR16(i, + (uint32_t)0U, + (uint32_t)16U, + (uint32_t)1U, + Lib_IntVector_Intrinsics_vec256 + x = Lib_IntVector_Intrinsics_vec256_load32_le(plain + i * (uint32_t)32U); + Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); + Lib_IntVector_Intrinsics_vec256_store32_le(plain + i * (uint32_t)32U, y);); memcpy(uu____2, plain, rem * sizeof(uint8_t)); } } diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/lib/freebl/verified/Hacl_Chacha20_Vec256.h index 478f2813f..5e9dccb9e 100644 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.h +++ b/lib/freebl/verified/Hacl_Chacha20_Vec256.h @@ -28,14 +28,12 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Chacha20.h" -#include "Hacl_Kremlib.h" +#include "Hacl_Krmllib.h" void Hacl_Chacha20_Vec256_chacha20_encrypt_256( diff --git a/lib/freebl/verified/Hacl_Curve25519_51.c b/lib/freebl/verified/Hacl_Curve25519_51.c index d366d5ced..09bfc4be8 100644 --- a/lib/freebl/verified/Hacl_Curve25519_51.c +++ b/lib/freebl/verified/Hacl_Curve25519_51.c @@ -21,7 +21,9 @@ * SOFTWARE. */ -#include "Hacl_Curve25519_51.h" +#include "internal/Hacl_Curve25519_51.h" + +#include "internal/Hacl_Krmllib.h" static const uint8_t g25519[32U] = { (uint8_t)9U }; @@ -150,8 +152,12 @@ montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) memcpy(out, p0, (uint32_t)10U * sizeof(uint64_t)); } -static void -fsquare_times(uint64_t *o, uint64_t *inp, FStar_UInt128_uint128 *tmp, uint32_t n) +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n) { Hacl_Impl_Curve25519_Field51_fsqr(o, inp, tmp); for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { @@ -159,43 +165,43 @@ fsquare_times(uint64_t *o, uint64_t *inp, FStar_UInt128_uint128 *tmp, uint32_t n } } -static void -finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) +void +Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp) { uint64_t t1[20U] = { 0U }; uint64_t *a1 = t1; uint64_t *b1 = t1 + (uint32_t)5U; uint64_t *t010 = t1 + (uint32_t)15U; FStar_UInt128_uint128 *tmp10 = tmp; - fsquare_times(a1, i, tmp10, (uint32_t)1U); - fsquare_times(t010, a1, tmp10, (uint32_t)2U); + Hacl_Curve25519_51_fsquare_times(a1, i, tmp10, (uint32_t)1U); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)2U); Hacl_Impl_Curve25519_Field51_fmul(b1, t010, i, tmp); Hacl_Impl_Curve25519_Field51_fmul(a1, b1, a1, tmp); - fsquare_times(t010, a1, tmp10, (uint32_t)1U); + Hacl_Curve25519_51_fsquare_times(t010, a1, tmp10, (uint32_t)1U); Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); - fsquare_times(t010, b1, tmp10, (uint32_t)5U); + Hacl_Curve25519_51_fsquare_times(t010, b1, tmp10, (uint32_t)5U); Hacl_Impl_Curve25519_Field51_fmul(b1, t010, b1, tmp); uint64_t *b10 = t1 + (uint32_t)5U; uint64_t *c10 = t1 + (uint32_t)10U; uint64_t *t011 = t1 + (uint32_t)15U; FStar_UInt128_uint128 *tmp11 = tmp; - fsquare_times(t011, b10, tmp11, (uint32_t)10U); + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)10U); Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); - fsquare_times(t011, c10, tmp11, (uint32_t)20U); + Hacl_Curve25519_51_fsquare_times(t011, c10, tmp11, (uint32_t)20U); Hacl_Impl_Curve25519_Field51_fmul(t011, t011, c10, tmp); - fsquare_times(t011, t011, tmp11, (uint32_t)10U); + Hacl_Curve25519_51_fsquare_times(t011, t011, tmp11, (uint32_t)10U); Hacl_Impl_Curve25519_Field51_fmul(b10, t011, b10, tmp); - fsquare_times(t011, b10, tmp11, (uint32_t)50U); + Hacl_Curve25519_51_fsquare_times(t011, b10, tmp11, (uint32_t)50U); Hacl_Impl_Curve25519_Field51_fmul(c10, t011, b10, tmp); uint64_t *b11 = t1 + (uint32_t)5U; uint64_t *c1 = t1 + (uint32_t)10U; uint64_t *t01 = t1 + (uint32_t)15U; FStar_UInt128_uint128 *tmp1 = tmp; - fsquare_times(t01, c1, tmp1, (uint32_t)100U); + Hacl_Curve25519_51_fsquare_times(t01, c1, tmp1, (uint32_t)100U); Hacl_Impl_Curve25519_Field51_fmul(t01, t01, c1, tmp); - fsquare_times(t01, t01, tmp1, (uint32_t)50U); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)50U); Hacl_Impl_Curve25519_Field51_fmul(t01, t01, b11, tmp); - fsquare_times(t01, t01, tmp1, (uint32_t)5U); + Hacl_Curve25519_51_fsquare_times(t01, t01, tmp1, (uint32_t)5U); uint64_t *a = t1; uint64_t *t0 = t1 + (uint32_t)15U; Hacl_Impl_Curve25519_Field51_fmul(o, t0, a, tmp); @@ -211,12 +217,14 @@ encode_point(uint8_t *o, uint64_t *i) FStar_UInt128_uint128 tmp_w[10U]; for (uint32_t _i = 0U; _i < (uint32_t)10U; ++_i) tmp_w[_i] = FStar_UInt128_uint64_to_uint128((uint64_t)0U); - finv(tmp, z, tmp_w); + Hacl_Curve25519_51_finv(tmp, z, tmp_w); Hacl_Impl_Curve25519_Field51_fmul(tmp, tmp, x, tmp_w); Hacl_Impl_Curve25519_Field51_store_felem(u64s, tmp); - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)4U; i0++) { - store64_le(o + i0 * (uint32_t)8U, u64s[i0]); - } + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); } void @@ -224,14 +232,16 @@ Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) { uint64_t init[10U] = { 0U }; uint64_t tmp[4U] = { 0U }; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) { - uint64_t *os = tmp; - uint8_t *bj = pub + i * (uint32_t)8U; - uint64_t u = load64_le(bj); - uint64_t r = u; - uint64_t x = r; - os[i] = x; - } + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); uint64_t tmp3 = tmp[3U]; tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; uint64_t *x = init; diff --git a/lib/freebl/verified/Hacl_Curve25519_51.h b/lib/freebl/verified/Hacl_Curve25519_51.h index dade9637b..2a4494a7a 100644 --- a/lib/freebl/verified/Hacl_Curve25519_51.h +++ b/lib/freebl/verified/Hacl_Curve25519_51.h @@ -28,12 +28,12 @@ extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" +#include "Hacl_Krmllib.h" #include "Hacl_Bignum25519_51.h" void Hacl_Curve25519_51_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); diff --git a/lib/freebl/verified/Hacl_Curve25519_64.c b/lib/freebl/verified/Hacl_Curve25519_64.c new file mode 100644 index 000000000..40144d48d --- /dev/null +++ b/lib/freebl/verified/Hacl_Curve25519_64.c @@ -0,0 +1,400 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "Hacl_Curve25519_64.h" + +#include "internal/Vale.h" +#include "internal/Hacl_Krmllib.h" +#include "config.h" +#include "curve25519-inline.h" +static inline void +add_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + add_scalar(out, f1, f2); +#else + uint64_t uu____0 = add_scalar_e(out, f1, f2); +#endif +} + +static inline void +fadd0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fadd(out, f1, f2); +#else + uint64_t uu____0 = fadd_e(out, f1, f2); +#endif +} + +static inline void +fsub0(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsub(out, f1, f2); +#else + uint64_t uu____0 = fsub_e(out, f1, f2); +#endif +} + +static inline void +fmul0(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul20(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul2(out, f1, f2, tmp); +#else + uint64_t uu____0 = fmul2_e(tmp, f1, out, f2); +#endif +} + +static inline void +fmul_scalar0(uint64_t *out, uint64_t *f1, uint64_t f2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fmul_scalar(out, f1, f2); +#else + uint64_t uu____0 = fmul_scalar_e(out, f1, f2); +#endif +} + +static inline void +fsqr0(uint64_t *out, uint64_t *f1, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr(out, f1, tmp); +#else + uint64_t uu____0 = fsqr_e(tmp, f1, out); +#endif +} + +static inline void +fsqr20(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + fsqr2(out, f, tmp); +#else + uint64_t uu____0 = fsqr2_e(tmp, f, out); +#endif +} + +static inline void +cswap20(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ +#if HACL_CAN_COMPILE_INLINE_ASM + cswap2(bit, p1, p2); +#else + uint64_t uu____0 = cswap2_e(bit, p1, p2); +#endif +} + +static const uint8_t g25519[32U] = { (uint8_t)9U }; + +static void +point_add_and_double(uint64_t *q, uint64_t *p01_tmp1, uint64_t *tmp2) +{ + uint64_t *nq = p01_tmp1; + uint64_t *nq_p1 = p01_tmp1 + (uint32_t)8U; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + uint64_t *x1 = q; + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *z3 = nq_p1 + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + uint64_t *x3 = nq_p1; + uint64_t *z31 = nq_p1 + (uint32_t)4U; + uint64_t *d0 = dc; + uint64_t *c0 = dc + (uint32_t)4U; + fadd0(c0, x3, z31); + fsub0(d0, x3, z31); + fmul20(dc, dc, ab, tmp2); + fadd0(x3, d0, c0); + fsub0(z31, d0, c0); + uint64_t *a1 = tmp1; + uint64_t *b1 = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab1 = tmp1; + uint64_t *dc1 = tmp1 + (uint32_t)8U; + fsqr20(dc1, ab1, tmp2); + fsqr20(nq_p1, nq_p1, tmp2); + a1[0U] = c[0U]; + a1[1U] = c[1U]; + a1[2U] = c[2U]; + a1[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b1, c, (uint64_t)121665U); + fadd0(b1, b1, d); + fmul20(nq, dc1, ab1, tmp2); + fmul0(z3, z3, x1, tmp2); +} + +static void +point_double(uint64_t *nq, uint64_t *tmp1, uint64_t *tmp2) +{ + uint64_t *x2 = nq; + uint64_t *z2 = nq + (uint32_t)4U; + uint64_t *a = tmp1; + uint64_t *b = tmp1 + (uint32_t)4U; + uint64_t *d = tmp1 + (uint32_t)8U; + uint64_t *c = tmp1 + (uint32_t)12U; + uint64_t *ab = tmp1; + uint64_t *dc = tmp1 + (uint32_t)8U; + fadd0(a, x2, z2); + fsub0(b, x2, z2); + fsqr20(dc, ab, tmp2); + a[0U] = c[0U]; + a[1U] = c[1U]; + a[2U] = c[2U]; + a[3U] = c[3U]; + fsub0(c, d, c); + fmul_scalar0(b, c, (uint64_t)121665U); + fadd0(b, b, d); + fmul20(nq, dc, ab, tmp2); +} + +static void +montgomery_ladder(uint64_t *out, uint8_t *key, uint64_t *init) +{ + uint64_t tmp2[16U] = { 0U }; + uint64_t p01_tmp1_swap[33U] = { 0U }; + uint64_t *p0 = p01_tmp1_swap; + uint64_t *p01 = p01_tmp1_swap; + uint64_t *p03 = p01; + uint64_t *p11 = p01 + (uint32_t)8U; + memcpy(p11, init, (uint32_t)8U * sizeof(uint64_t)); + uint64_t *x0 = p03; + uint64_t *z0 = p03 + (uint32_t)4U; + x0[0U] = (uint64_t)1U; + x0[1U] = (uint64_t)0U; + x0[2U] = (uint64_t)0U; + x0[3U] = (uint64_t)0U; + z0[0U] = (uint64_t)0U; + z0[1U] = (uint64_t)0U; + z0[2U] = (uint64_t)0U; + z0[3U] = (uint64_t)0U; + uint64_t *p01_tmp1 = p01_tmp1_swap; + uint64_t *p01_tmp11 = p01_tmp1_swap; + uint64_t *nq1 = p01_tmp1_swap; + uint64_t *nq_p11 = p01_tmp1_swap + (uint32_t)8U; + uint64_t *swap = p01_tmp1_swap + (uint32_t)32U; + cswap20((uint64_t)1U, nq1, nq_p11); + point_add_and_double(init, p01_tmp11, tmp2); + swap[0U] = (uint64_t)1U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)251U; i++) { + uint64_t *p01_tmp12 = p01_tmp1_swap; + uint64_t *swap1 = p01_tmp1_swap + (uint32_t)32U; + uint64_t *nq2 = p01_tmp12; + uint64_t *nq_p12 = p01_tmp12 + (uint32_t)8U; + uint64_t + bit = + (uint64_t)(key[((uint32_t)253U - i) / (uint32_t)8U] >> ((uint32_t)253U - i) % (uint32_t)8U & (uint8_t)1U); + uint64_t sw = swap1[0U] ^ bit; + cswap20(sw, nq2, nq_p12); + point_add_and_double(init, p01_tmp12, tmp2); + swap1[0U] = bit; + } + uint64_t sw = swap[0U]; + cswap20(sw, nq1, nq_p11); + uint64_t *nq10 = p01_tmp1; + uint64_t *tmp1 = p01_tmp1 + (uint32_t)16U; + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + point_double(nq10, tmp1, tmp2); + memcpy(out, p0, (uint32_t)8U * sizeof(uint64_t)); +} + +static void +fsquare_times(uint64_t *o, uint64_t *inp, uint64_t *tmp, uint32_t n) +{ + fsqr0(o, inp, tmp); + for (uint32_t i = (uint32_t)0U; i < n - (uint32_t)1U; i++) { + fsqr0(o, o, tmp); + } +} + +static void +finv(uint64_t *o, uint64_t *i, uint64_t *tmp) +{ + uint64_t t1[16U] = { 0U }; + uint64_t *a1 = t1; + uint64_t *b1 = t1 + (uint32_t)4U; + uint64_t *t010 = t1 + (uint32_t)12U; + uint64_t *tmp10 = tmp; + fsquare_times(a1, i, tmp10, (uint32_t)1U); + fsquare_times(t010, a1, tmp10, (uint32_t)2U); + fmul0(b1, t010, i, tmp); + fmul0(a1, b1, a1, tmp); + fsquare_times(t010, a1, tmp10, (uint32_t)1U); + fmul0(b1, t010, b1, tmp); + fsquare_times(t010, b1, tmp10, (uint32_t)5U); + fmul0(b1, t010, b1, tmp); + uint64_t *b10 = t1 + (uint32_t)4U; + uint64_t *c10 = t1 + (uint32_t)8U; + uint64_t *t011 = t1 + (uint32_t)12U; + uint64_t *tmp11 = tmp; + fsquare_times(t011, b10, tmp11, (uint32_t)10U); + fmul0(c10, t011, b10, tmp); + fsquare_times(t011, c10, tmp11, (uint32_t)20U); + fmul0(t011, t011, c10, tmp); + fsquare_times(t011, t011, tmp11, (uint32_t)10U); + fmul0(b10, t011, b10, tmp); + fsquare_times(t011, b10, tmp11, (uint32_t)50U); + fmul0(c10, t011, b10, tmp); + uint64_t *b11 = t1 + (uint32_t)4U; + uint64_t *c1 = t1 + (uint32_t)8U; + uint64_t *t01 = t1 + (uint32_t)12U; + uint64_t *tmp1 = tmp; + fsquare_times(t01, c1, tmp1, (uint32_t)100U); + fmul0(t01, t01, c1, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)50U); + fmul0(t01, t01, b11, tmp); + fsquare_times(t01, t01, tmp1, (uint32_t)5U); + uint64_t *a = t1; + uint64_t *t0 = t1 + (uint32_t)12U; + fmul0(o, t0, a, tmp); +} + +static void +store_felem(uint64_t *b, uint64_t *f) +{ + uint64_t f30 = f[3U]; + uint64_t top_bit0 = f30 >> (uint32_t)63U; + f[3U] = f30 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit0); + uint64_t f31 = f[3U]; + uint64_t top_bit = f31 >> (uint32_t)63U; + f[3U] = f31 & (uint64_t)0x7fffffffffffffffU; + add_scalar0(f, f, (uint64_t)19U * top_bit); + uint64_t f0 = f[0U]; + uint64_t f1 = f[1U]; + uint64_t f2 = f[2U]; + uint64_t f3 = f[3U]; + uint64_t m0 = FStar_UInt64_gte_mask(f0, (uint64_t)0xffffffffffffffedU); + uint64_t m1 = FStar_UInt64_eq_mask(f1, (uint64_t)0xffffffffffffffffU); + uint64_t m2 = FStar_UInt64_eq_mask(f2, (uint64_t)0xffffffffffffffffU); + uint64_t m3 = FStar_UInt64_eq_mask(f3, (uint64_t)0x7fffffffffffffffU); + uint64_t mask = ((m0 & m1) & m2) & m3; + uint64_t f0_ = f0 - (mask & (uint64_t)0xffffffffffffffedU); + uint64_t f1_ = f1 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f2_ = f2 - (mask & (uint64_t)0xffffffffffffffffU); + uint64_t f3_ = f3 - (mask & (uint64_t)0x7fffffffffffffffU); + uint64_t o0 = f0_; + uint64_t o1 = f1_; + uint64_t o2 = f2_; + uint64_t o3 = f3_; + b[0U] = o0; + b[1U] = o1; + b[2U] = o2; + b[3U] = o3; +} + +static void +encode_point(uint8_t *o, uint64_t *i) +{ + uint64_t *x = i; + uint64_t *z = i + (uint32_t)4U; + uint64_t tmp[4U] = { 0U }; + uint64_t u64s[4U] = { 0U }; + uint64_t tmp_w[16U] = { 0U }; + finv(tmp, z, tmp_w); + fmul0(tmp, tmp, x, tmp_w); + store_felem(u64s, tmp); + KRML_MAYBE_FOR4(i0, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + store64_le(o + i0 * (uint32_t)8U, u64s[i0]);); +} + +void +Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint64_t init[8U] = { 0U }; + uint64_t tmp[4U] = { 0U }; + KRML_MAYBE_FOR4(i, + (uint32_t)0U, + (uint32_t)4U, + (uint32_t)1U, + uint64_t *os = tmp; + uint8_t *bj = pub + i * (uint32_t)8U; + uint64_t u = load64_le(bj); + uint64_t r = u; + uint64_t x = r; + os[i] = x;); + uint64_t tmp3 = tmp[3U]; + tmp[3U] = tmp3 & (uint64_t)0x7fffffffffffffffU; + uint64_t *x = init; + uint64_t *z = init + (uint32_t)4U; + z[0U] = (uint64_t)1U; + z[1U] = (uint64_t)0U; + z[2U] = (uint64_t)0U; + z[3U] = (uint64_t)0U; + x[0U] = tmp[0U]; + x[1U] = tmp[1U]; + x[2U] = tmp[2U]; + x[3U] = tmp[3U]; + montgomery_ladder(init, priv, init); + encode_point(out, init); +} + +void +Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv) +{ + uint8_t basepoint[32U] = { 0U }; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t *os = basepoint; + uint8_t x = g25519[i]; + os[i] = x; + } + Hacl_Curve25519_64_scalarmult(pub, priv, basepoint); +} + +bool +Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub) +{ + uint8_t zeros[32U] = { 0U }; + Hacl_Curve25519_64_scalarmult(out, priv, pub); + uint8_t res = (uint8_t)255U; + for (uint32_t i = (uint32_t)0U; i < (uint32_t)32U; i++) { + uint8_t uu____0 = FStar_UInt8_eq_mask(out[i], zeros[i]); + res = uu____0 & res; + } + uint8_t z = res; + bool r = z == (uint8_t)255U; + return !r; +} diff --git a/lib/freebl/verified/Hacl_Curve25519_64.h b/lib/freebl/verified/Hacl_Curve25519_64.h new file mode 100644 index 000000000..e9dec2b9a --- /dev/null +++ b/lib/freebl/verified/Hacl_Curve25519_64.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __Hacl_Curve25519_64_H +#define __Hacl_Curve25519_64_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "Hacl_Krmllib.h" + +void Hacl_Curve25519_64_scalarmult(uint8_t *out, uint8_t *priv, uint8_t *pub); + +void Hacl_Curve25519_64_secret_to_public(uint8_t *pub, uint8_t *priv); + +bool Hacl_Curve25519_64_ecdh(uint8_t *out, uint8_t *priv, uint8_t *pub); + +#if defined(__cplusplus) +} +#endif + +#define __Hacl_Curve25519_64_H_DEFINED +#endif diff --git a/lib/freebl/verified/Hacl_Kremlib.h b/lib/freebl/verified/Hacl_Krmllib.h index 1b47ca3b1..453492b5c 100644 --- a/lib/freebl/verified/Hacl_Kremlib.h +++ b/lib/freebl/verified/Hacl_Krmllib.h @@ -21,17 +21,17 @@ * SOFTWARE. */ -#ifndef __Hacl_Kremlib_H -#define __Hacl_Kremlib_H +#ifndef __Hacl_Krmllib_H +#define __Hacl_Krmllib_H #if defined(__cplusplus) extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" static inline uint32_t FStar_UInt32_eq_mask(uint32_t a, uint32_t b); @@ -43,10 +43,24 @@ static inline uint64_t FStar_UInt64_eq_mask(uint64_t a, uint64_t b); static inline uint64_t FStar_UInt64_gte_mask(uint64_t a, uint64_t b); +static inline uint16_t FStar_UInt16_eq_mask(uint16_t a, uint16_t b); + static inline FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); static inline FStar_UInt128_uint128 +FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_sub_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); + +static inline FStar_UInt128_uint128 +FStar_UInt128_shift_left(FStar_UInt128_uint128 a, uint32_t s); + +static inline FStar_UInt128_uint128 FStar_UInt128_shift_right(FStar_UInt128_uint128 a, uint32_t s); static inline FStar_UInt128_uint128 FStar_UInt128_uint64_to_uint128(uint64_t a); @@ -55,9 +69,13 @@ static inline uint64_t FStar_UInt128_uint128_to_uint64(FStar_UInt128_uint128 a); static inline FStar_UInt128_uint128 FStar_UInt128_mul_wide(uint64_t x, uint64_t y); +static inline void store128_be(uint8_t *x0, FStar_UInt128_uint128 x1); + +static inline FStar_UInt128_uint128 load128_be(uint8_t *x0); + #if defined(__cplusplus) } #endif -#define __Hacl_Kremlib_H_DEFINED +#define __Hacl_Krmllib_H_DEFINED #endif diff --git a/lib/freebl/verified/Hacl_Poly1305_128.c b/lib/freebl/verified/Hacl_Poly1305_128.c index 963068d42..ae8570c75 100644 --- a/lib/freebl/verified/Hacl_Poly1305_128.c +++ b/lib/freebl/verified/Hacl_Poly1305_128.c @@ -21,14 +21,13 @@ * SOFTWARE. */ -#include "Hacl_Poly1305_128.h" +#include "internal/Hacl_Poly1305_128.h" void Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b) { - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(b); Lib_IntVector_Intrinsics_vec128 b2 = Lib_IntVector_Intrinsics_vec128_load64_le(b + (uint32_t)16U); @@ -347,8 +346,6 @@ Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( out[4U] = o4; } -uint32_t Hacl_Poly1305_128_blocklen = (uint32_t)16U; - void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key) { @@ -573,9 +570,8 @@ Hacl_Poly1305_128_poly1305_update1(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t { Lib_IntVector_Intrinsics_vec128 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec128 *acc = ctx; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint64_t u0 = load64_le(text); uint64_t lo = u0; uint64_t u = load64_le(text + (uint32_t)8U); @@ -800,9 +796,8 @@ Hacl_Poly1305_128_poly1305_update( uint32_t nb = len1 / bs; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; Lib_IntVector_Intrinsics_vec128 b1 = Lib_IntVector_Intrinsics_vec128_load64_le(block); Lib_IntVector_Intrinsics_vec128 b2 = Lib_IntVector_Intrinsics_vec128_load64_le(block + (uint32_t)16U); @@ -1024,9 +1019,8 @@ Hacl_Poly1305_128_poly1305_update( uint32_t rem = len1 % (uint32_t)16U; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = t1 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -1232,9 +1226,8 @@ Hacl_Poly1305_128_poly1305_update( } if (rem > (uint32_t)0U) { uint8_t *last = t1 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec128 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 e[5U] KRML_POST_ALIGN(16) = { 0U }; uint8_t tmp[16U] = { 0U }; memcpy(tmp, last, rem * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); @@ -1615,9 +1608,8 @@ Hacl_Poly1305_128_poly1305_finish( void Hacl_Poly1305_128_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) { - Lib_IntVector_Intrinsics_vec128 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec128_zero; + KRML_PRE_ALIGN(16) + Lib_IntVector_Intrinsics_vec128 ctx[25U] KRML_POST_ALIGN(16) = { 0U }; Hacl_Poly1305_128_poly1305_init(ctx, key); Hacl_Poly1305_128_poly1305_update(ctx, len, text); Hacl_Poly1305_128_poly1305_finish(tag, key, ctx); diff --git a/lib/freebl/verified/Hacl_Poly1305_128.h b/lib/freebl/verified/Hacl_Poly1305_128.h index 49171ddcc..03069fdb4 100644 --- a/lib/freebl/verified/Hacl_Poly1305_128.h +++ b/lib/freebl/verified/Hacl_Poly1305_128.h @@ -28,24 +28,13 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> - -#include "Hacl_Kremlib.h" - -void -Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b); - -void -Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( - Lib_IntVector_Intrinsics_vec128 *out, - Lib_IntVector_Intrinsics_vec128 *p); - -extern uint32_t Hacl_Poly1305_128_blocklen; +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" +#include "Hacl_Krmllib.h" +#include "libintvector.h" typedef Lib_IntVector_Intrinsics_vec128 *Hacl_Poly1305_128_poly1305_ctx; void Hacl_Poly1305_128_poly1305_init(Lib_IntVector_Intrinsics_vec128 *ctx, uint8_t *key); diff --git a/lib/freebl/verified/Hacl_Poly1305_256.c b/lib/freebl/verified/Hacl_Poly1305_256.c index 6f5bffd97..c5a2a5908 100644 --- a/lib/freebl/verified/Hacl_Poly1305_256.c +++ b/lib/freebl/verified/Hacl_Poly1305_256.c @@ -21,14 +21,13 @@ * SOFTWARE. */ -#include "Hacl_Poly1305_256.h" +#include "internal/Hacl_Poly1305_256.h" void Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b) { - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(b); Lib_IntVector_Intrinsics_vec256 hi = Lib_IntVector_Intrinsics_vec256_load64_le(b + (uint32_t)32U); @@ -657,8 +656,6 @@ Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( out[4U] = o4; } -uint32_t Hacl_Poly1305_256_blocklen = (uint32_t)16U; - void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key) { @@ -1042,9 +1039,8 @@ Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t { Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint64_t u0 = load64_le(text); uint64_t lo = u0; uint64_t u = load64_le(text + (uint32_t)8U); @@ -1269,9 +1265,8 @@ Hacl_Poly1305_256_poly1305_update( uint32_t nb = len1 / bs; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load64_le(block); Lib_IntVector_Intrinsics_vec256 hi = Lib_IntVector_Intrinsics_vec256_load64_le(block + (uint32_t)32U); @@ -1495,9 +1490,8 @@ Hacl_Poly1305_256_poly1305_update( uint32_t rem = len1 % (uint32_t)16U; for (uint32_t i = (uint32_t)0U; i < nb; i++) { uint8_t *block = t1 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint64_t u0 = load64_le(block); uint64_t lo = u0; uint64_t u = load64_le(block + (uint32_t)8U); @@ -1703,9 +1697,8 @@ Hacl_Poly1305_256_poly1305_update( } if (rem > (uint32_t)0U) { uint8_t *last = t1 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 e[5U] KRML_POST_ALIGN(32) = { 0U }; uint8_t tmp[16U] = { 0U }; memcpy(tmp, last, rem * sizeof(uint8_t)); uint64_t u0 = load64_le(tmp); @@ -2086,9 +2079,8 @@ Hacl_Poly1305_256_poly1305_finish( void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) { - Lib_IntVector_Intrinsics_vec256 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; + KRML_PRE_ALIGN(32) + Lib_IntVector_Intrinsics_vec256 ctx[25U] KRML_POST_ALIGN(32) = { 0U }; Hacl_Poly1305_256_poly1305_init(ctx, key); Hacl_Poly1305_256_poly1305_update(ctx, len, text); Hacl_Poly1305_256_poly1305_finish(tag, key, ctx); diff --git a/lib/freebl/verified/Hacl_Poly1305_256.h b/lib/freebl/verified/Hacl_Poly1305_256.h index 62a2ca002..d9bf5fd83 100644 --- a/lib/freebl/verified/Hacl_Poly1305_256.h +++ b/lib/freebl/verified/Hacl_Poly1305_256.h @@ -28,24 +28,13 @@ extern "C" { #endif -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> - -#include "Hacl_Kremlib.h" - -void -Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b); - -void -Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( - Lib_IntVector_Intrinsics_vec256 *out, - Lib_IntVector_Intrinsics_vec256 *p); - -extern uint32_t Hacl_Poly1305_256_blocklen; +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" +#include "Hacl_Krmllib.h" +#include "libintvector.h" typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx; void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key); diff --git a/lib/freebl/verified/Hacl_Poly1305_32.c b/lib/freebl/verified/Hacl_Poly1305_32.c index 25ee87c1f..8de2eca7f 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.c +++ b/lib/freebl/verified/Hacl_Poly1305_32.c @@ -23,7 +23,7 @@ #include "Hacl_Poly1305_32.h" -uint32_t Hacl_Poly1305_32_blocklen = (uint32_t)16U; +#include "internal/Hacl_Krmllib.h" void Hacl_Poly1305_32_poly1305_init(uint64_t *ctx, uint8_t *key) diff --git a/lib/freebl/verified/Hacl_Poly1305_32.h b/lib/freebl/verified/Hacl_Poly1305_32.h index c552d6f42..84a2f606b 100644 --- a/lib/freebl/verified/Hacl_Poly1305_32.h +++ b/lib/freebl/verified/Hacl_Poly1305_32.h @@ -28,14 +28,12 @@ extern "C" { #endif -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" #include <string.h> -#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" -#include "Hacl_Kremlib.h" - -extern uint32_t Hacl_Poly1305_32_blocklen; +#include "Hacl_Krmllib.h" typedef uint64_t *Hacl_Poly1305_32_poly1305_ctx; diff --git a/lib/freebl/verified/curve25519-inline.h b/lib/freebl/verified/curve25519-inline.h new file mode 100644 index 000000000..690e75a1b --- /dev/null +++ b/lib/freebl/verified/curve25519-inline.h @@ -0,0 +1,942 @@ +#ifdef __GNUC__ +#if defined(__x86_64__) || defined(_M_X64) +#pragma once +#include <inttypes.h> + +// Computes the addition of four-element f1 with value in f2 +// and returns the carry (if any) +static inline void +add_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + __asm__ volatile( + // Clear registers to propagate the carry bit + " xor %%r8d, %%r8d;" + " xor %%r9d, %%r9d;" + " xor %%r10d, %%r10d;" + " xor %%r11d, %%r11d;" + " xor %%eax, %%eax;" + + // Begin addition chain + " addq 0(%2), %0;" + " movq %0, 0(%1);" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%1);" + " adcxq 16(%2), %%r9;" + " movq %%r9, 16(%1);" + " adcxq 24(%2), %%r10;" + " movq %%r10, 24(%1);" + + // Return the carry bit in a register + " adcx %%r11, %%rax;" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field addition of two field elements +static inline void +fadd(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw addition of f1 + f2 + " movq 0(%0), %%r8;" + " addq 0(%2), %%r8;" + " movq 8(%0), %%r9;" + " adcxq 8(%2), %%r9;" + " movq 16(%0), %%r10;" + " adcxq 16(%2), %%r10;" + " movq 24(%0), %%r11;" + " adcxq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %0;" + " cmovc %0, %%rax;" + + // Step 2: Add carry*38 to the original sum + " xor %%ecx, %%ecx;" + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %0, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2) + : "r"(out), "r"(f1) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes the field substraction of two field elements +static inline void +fsub(uint64_t *out, uint64_t *f1, uint64_t *f2) +{ + __asm__ volatile( + // Compute the raw substraction of f1-f2 + " movq 0(%1), %%r8;" + " subq 0(%2), %%r8;" + " movq 8(%1), %%r9;" + " sbbq 8(%2), %%r9;" + " movq 16(%1), %%r10;" + " sbbq 16(%2), %%r10;" + " movq 24(%1), %%r11;" + " sbbq 24(%2), %%r11;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $0, %%rax;" + " mov $38, %%rcx;" + " cmovc %%rcx, %%rax;" + + // Step 2: Substract carry*38 from the original difference + " sub %%rax, %%r8;" + " sbb $0, %%r9;" + " sbb $0, %%r10;" + " sbb $0, %%r11;" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rcx, %%rax;" + " sub %%rax, %%r8;" + + // Store the result + " movq %%r8, 0(%0);" + " movq %%r9, 8(%0);" + " movq %%r10, 16(%0);" + " movq %%r11, 24(%0);" + : + : "r"(out), "r"(f1), "r"(f2) + : "%rax", "%rcx", "%r8", "%r9", "%r10", "%r11", "memory", "cc"); +} + +// Computes a field multiplication: out <- f1 * f2 +// Uses the 8-element buffer tmp for intermediate results +static inline void +fmul(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- src1 * src2 ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes two field multiplications: +// out[0] <- f1[0] * f2[0] +// out[1] <- f1[1] * f2[1] +// Uses the 16-element buffer tmp for intermediate results: +static inline void +fmul2(uint64_t *out, uint64_t *f1, uint64_t *f2, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication tmp[0] <- f1[0] * f2[0] ////// + + // Compute src1[0] * src2 + " movq 0(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 0(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 8(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 8(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 8(%2), %%r8;" + " movq %%r8, 8(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 16(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 16(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 16(%2), %%r8;" + " movq %%r8, 16(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 24(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 24(%0), %%rdx;" + " mulxq 0(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 24(%2), %%r8;" + " movq %%r8, 24(%2);" + " mulxq 8(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 32(%2);" + " mulxq 16(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 40(%2);" + " mov $0, %%r8;" + " mulxq 24(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 48(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 56(%2);" + + /////// Compute the raw multiplication tmp[1] <- f1[1] * f2[1] ////// + + // Compute src1[0] * src2 + " movq 32(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " movq %%r8, 64(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " movq %%r10, 72(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + + // Compute src1[1] * src2 + " movq 40(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 72(%2), %%r8;" + " movq %%r8, 72(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 80(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[2] * src2 + " movq 48(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 80(%2), %%r8;" + " movq %%r8, 80(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 88(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + + // Compute src1[3] * src2 + " movq 56(%0), %%rdx;" + " mulxq 32(%1), %%r8, %%r9;" + " xor %%r10d, %%r10d;" + " adcxq 88(%2), %%r8;" + " movq %%r8, 88(%2);" + " mulxq 40(%1), %%r10, %%r11;" + " adox %%r9, %%r10;" + " adcx %%rbx, %%r10;" + " movq %%r10, 96(%2);" + " mulxq 48(%1), %%rbx, %%r13;" + " adox %%r11, %%rbx;" + " adcx %%r14, %%rbx;" + " movq %%rbx, 104(%2);" + " mov $0, %%r8;" + " mulxq 56(%1), %%r14, %%rdx;" + " adox %%r13, %%r14;" + " adcx %%rax, %%r14;" + " movq %%r14, 112(%2);" + " mov $0, %%rax;" + " adox %%rdx, %%rax;" + " adcx %%r8, %%rax;" + " movq %%rax, 120(%2);" + + // Line up pointers + " mov %2, %0;" + " mov %3, %2;" + + /////// Wrap the results back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 8(%2);" + " adcx %1, %%r10;" + " movq %%r10, 16(%2);" + " adcx %1, %%r11;" + " movq %%r11, 24(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%2);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %k1, %k1;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %1, %%rax;" + " adox %1, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %1, %%r9;" + " movq %%r9, 40(%2);" + " adcx %1, %%r10;" + " movq %%r10, 48(%2);" + " adcx %1, %%r11;" + " movq %%r11, 56(%2);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%2);" + : "+&r"(f1), "+&r"(f2), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "memory", "cc"); +} + +// Computes the field multiplication of four-element f1 with value in f2 +// Requires f2 to be smaller than 2^17 +static inline void +fmul_scalar(uint64_t *out, uint64_t *f1, uint64_t f2) +{ + register uint64_t f2_r __asm__("rdx") = f2; + + __asm__ volatile( + // Compute the raw multiplication of f1*f2 + " mulxq 0(%2), %%r8, %%rcx;" // f1[0]*f2 + " mulxq 8(%2), %%r9, %%rbx;" // f1[1]*f2 + " add %%rcx, %%r9;" + " mov $0, %%rcx;" + " mulxq 16(%2), %%r10, %%r13;" // f1[2]*f2 + " adcx %%rbx, %%r10;" + " mulxq 24(%2), %%r11, %%rax;" // f1[3]*f2 + " adcx %%r13, %%r11;" + " adcx %%rcx, %%rax;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute carry*38 + " mov $38, %%rdx;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f2_r) + : "r"(out), "r"(f1) + : "%rax", "%rbx", "%rcx", "%r8", "%r9", "%r10", "%r11", "%r13", "memory", "cc"); +} + +// Computes p1 <- bit ? p2 : p1 in constant time +static inline void +cswap2(uint64_t bit, uint64_t *p1, uint64_t *p2) +{ + __asm__ volatile( + // Transfer bit into CF flag + " add $18446744073709551615, %0;" + + // cswap p1[0], p2[0] + " movq 0(%1), %%r8;" + " movq 0(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 0(%1);" + " movq %%r9, 0(%2);" + + // cswap p1[1], p2[1] + " movq 8(%1), %%r8;" + " movq 8(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 8(%1);" + " movq %%r9, 8(%2);" + + // cswap p1[2], p2[2] + " movq 16(%1), %%r8;" + " movq 16(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 16(%1);" + " movq %%r9, 16(%2);" + + // cswap p1[3], p2[3] + " movq 24(%1), %%r8;" + " movq 24(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 24(%1);" + " movq %%r9, 24(%2);" + + // cswap p1[4], p2[4] + " movq 32(%1), %%r8;" + " movq 32(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 32(%1);" + " movq %%r9, 32(%2);" + + // cswap p1[5], p2[5] + " movq 40(%1), %%r8;" + " movq 40(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 40(%1);" + " movq %%r9, 40(%2);" + + // cswap p1[6], p2[6] + " movq 48(%1), %%r8;" + " movq 48(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 48(%1);" + " movq %%r9, 48(%2);" + + // cswap p1[7], p2[7] + " movq 56(%1), %%r8;" + " movq 56(%2), %%r9;" + " mov %%r8, %%r10;" + " cmovc %%r9, %%r8;" + " cmovc %%r10, %%r9;" + " movq %%r8, 56(%1);" + " movq %%r9, 56(%2);" + : "+&r"(bit) + : "r"(p1), "r"(p2) + : "%r8", "%r9", "%r10", "memory", "cc"); +} + +// Computes the square of a field element: out <- f * f +// Uses the 8-element buffer tmp for intermediate results +static inline void +fsqr(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + + /////// Compute the raw multiplication: tmp <- f * f ////// + + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + /////// Wrap the result back into the field ////// + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +// Computes two field squarings: +// out[0] <- f[0] * f[0] +// out[1] <- f[1] * f[1] +// Uses the 16-element buffer tmp for intermediate results +static inline void +fsqr2(uint64_t *out, uint64_t *f, uint64_t *tmp) +{ + __asm__ volatile( + // Step 1: Compute all partial products + " movq 0(%0), %%rdx;" // f[0] + " mulxq 8(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 16(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 24(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 24(%0), %%rdx;" // f[3] + " mulxq 8(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 16(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 8(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 16(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 0(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 0(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 8(%1);" + " movq 8(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 16(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 24(%1);" + " movq 16(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 32(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 40(%1);" + " movq 24(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 48(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 56(%1);" + + // Step 1: Compute all partial products + " movq 32(%0), %%rdx;" // f[0] + " mulxq 40(%0), %%r8, %%r14;" + " xor %%r15d, %%r15d;" // f[1]*f[0] + " mulxq 48(%0), %%r9, %%r10;" + " adcx %%r14, %%r9;" // f[2]*f[0] + " mulxq 56(%0), %%rax, %%rcx;" + " adcx %%rax, %%r10;" // f[3]*f[0] + " movq 56(%0), %%rdx;" // f[3] + " mulxq 40(%0), %%r11, %%rbx;" + " adcx %%rcx, %%r11;" // f[1]*f[3] + " mulxq 48(%0), %%rax, %%r13;" + " adcx %%rax, %%rbx;" // f[2]*f[3] + " movq 40(%0), %%rdx;" + " adcx %%r15, %%r13;" // f1 + " mulxq 48(%0), %%rax, %%rcx;" + " mov $0, %%r14;" // f[2]*f[1] + + // Step 2: Compute two parallel carry chains + " xor %%r15d, %%r15d;" + " adox %%rax, %%r10;" + " adcx %%r8, %%r8;" + " adox %%rcx, %%r11;" + " adcx %%r9, %%r9;" + " adox %%r15, %%rbx;" + " adcx %%r10, %%r10;" + " adox %%r15, %%r13;" + " adcx %%r11, %%r11;" + " adox %%r15, %%r14;" + " adcx %%rbx, %%rbx;" + " adcx %%r13, %%r13;" + " adcx %%r14, %%r14;" + + // Step 3: Compute intermediate squares + " movq 32(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[0]^2 + " movq %%rax, 64(%1);" + " add %%rcx, %%r8;" + " movq %%r8, 72(%1);" + " movq 40(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[1]^2 + " adcx %%rax, %%r9;" + " movq %%r9, 80(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 88(%1);" + " movq 48(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[2]^2 + " adcx %%rax, %%r11;" + " movq %%r11, 96(%1);" + " adcx %%rcx, %%rbx;" + " movq %%rbx, 104(%1);" + " movq 56(%0), %%rdx;" + " mulx %%rdx, %%rax, %%rcx;" // f[3]^2 + " adcx %%rax, %%r13;" + " movq %%r13, 112(%1);" + " adcx %%rcx, %%r14;" + " movq %%r14, 120(%1);" + + // Line up pointers + " mov %1, %0;" + " mov %2, %1;" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 32(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 0(%0), %%r8;" + " mulxq 40(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 8(%0), %%r9;" + " mulxq 48(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 16(%0), %%r10;" + " mulxq 56(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 24(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 8(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 16(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 24(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 0(%1);" + + // Step 1: Compute dst + carry == tmp_hi * 38 + tmp_lo + " mov $38, %%rdx;" + " mulxq 96(%0), %%r8, %%r13;" + " xor %%ecx, %%ecx;" + " adoxq 64(%0), %%r8;" + " mulxq 104(%0), %%r9, %%rbx;" + " adcx %%r13, %%r9;" + " adoxq 72(%0), %%r9;" + " mulxq 112(%0), %%r10, %%r13;" + " adcx %%rbx, %%r10;" + " adoxq 80(%0), %%r10;" + " mulxq 120(%0), %%r11, %%rax;" + " adcx %%r13, %%r11;" + " adoxq 88(%0), %%r11;" + " adcx %%rcx, %%rax;" + " adox %%rcx, %%rax;" + " imul %%rdx, %%rax;" + + // Step 2: Fold the carry back into dst + " add %%rax, %%r8;" + " adcx %%rcx, %%r9;" + " movq %%r9, 40(%1);" + " adcx %%rcx, %%r10;" + " movq %%r10, 48(%1);" + " adcx %%rcx, %%r11;" + " movq %%r11, 56(%1);" + + // Step 3: Fold the carry bit back in; guaranteed not to carry at this point + " mov $0, %%rax;" + " cmovc %%rdx, %%rax;" + " add %%rax, %%r8;" + " movq %%r8, 32(%1);" + : "+&r"(f), "+&r"(tmp) + : "r"(out) + : "%rax", "%rbx", "%rcx", "%rdx", "%r8", "%r9", "%r10", "%r11", "%r13", "%r14", "%r15", "memory", "cc"); +} + +#endif /* defined(__x86_64__) || defined(_M_X64) */ +#endif /* __GNUC__ */ diff --git a/lib/freebl/verified/internal/Hacl_Bignum.h b/lib/freebl/verified/internal/Hacl_Bignum.h new file mode 100644 index 000000000..6080d3787 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Bignum.h @@ -0,0 +1,312 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Bignum_H +#define __internal_Hacl_Bignum_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Bignum.h" +#include "lib_intrinsics.h" +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *b, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_mul_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *b, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint32( + uint32_t aLen, + uint32_t *a, + uint32_t *tmp, + uint32_t *res); + +void +Hacl_Bignum_Karatsuba_bn_karatsuba_sqr_uint64( + uint32_t aLen, + uint64_t *a, + uint64_t *tmp, + uint64_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_add_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u32( + uint32_t len1, + uint32_t *n, + uint32_t *a, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_bn_sub_mod_n_u64( + uint32_t len1, + uint64_t *n, + uint64_t *a, + uint64_t *b, + uint64_t *res); + +uint32_t Hacl_Bignum_ModInvLimb_mod_inv_uint32(uint32_t n0); + +uint64_t Hacl_Bignum_ModInvLimb_mod_inv_uint64(uint64_t n0); + +uint32_t Hacl_Bignum_Montgomery_bn_check_modulus_u32(uint32_t len, uint32_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *c, + uint32_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv, + uint32_t *r2, + uint32_t *a, + uint32_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *bM, + uint32_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u32( + uint32_t len, + uint32_t *n, + uint32_t nInv_u64, + uint32_t *aM, + uint32_t *resM); + +uint64_t Hacl_Bignum_Montgomery_bn_check_modulus_u64(uint32_t len, uint64_t *n); + +void +Hacl_Bignum_Montgomery_bn_precomp_r2_mod_n_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_mont_reduction_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *c, + uint64_t *res); + +void +Hacl_Bignum_Montgomery_bn_to_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv, + uint64_t *r2, + uint64_t *a, + uint64_t *aM); + +void +Hacl_Bignum_Montgomery_bn_from_mont_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *a); + +void +Hacl_Bignum_Montgomery_bn_mont_mul_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *bM, + uint64_t *resM); + +void +Hacl_Bignum_Montgomery_bn_mont_sqr_u64( + uint32_t len, + uint64_t *n, + uint64_t nInv_u64, + uint64_t *aM, + uint64_t *resM); + +uint32_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u32( + uint32_t len, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u32( + uint32_t len, + uint32_t *n, + uint32_t mu, + uint32_t *r2, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u32( + uint32_t len, + uint32_t nBits, + uint32_t *n, + uint32_t *a, + uint32_t bBits, + uint32_t *b, + uint32_t *res); + +uint64_t +Hacl_Bignum_Exponentiation_bn_check_mod_exp_u64( + uint32_t len, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_precomp_u64( + uint32_t len, + uint64_t *n, + uint64_t mu, + uint64_t *r2, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_vartime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +void +Hacl_Bignum_Exponentiation_bn_mod_exp_consttime_u64( + uint32_t len, + uint32_t nBits, + uint64_t *n, + uint64_t *a, + uint32_t bBits, + uint64_t *b, + uint64_t *res); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Bignum_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Chacha20.h b/lib/freebl/verified/internal/Hacl_Chacha20.h new file mode 100644 index 000000000..51ecfeef3 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Chacha20.h @@ -0,0 +1,50 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Chacha20_H +#define __internal_Hacl_Chacha20_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Chacha20.h" + +extern const uint32_t Hacl_Impl_Chacha20_Vec_chacha20_constants[4U]; + +void Hacl_Impl_Chacha20_chacha20_init(uint32_t *ctx, uint8_t *k, uint8_t *n, uint32_t ctr); + +void +Hacl_Impl_Chacha20_chacha20_update(uint32_t *ctx, uint32_t len, uint8_t *out, uint8_t *text); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Chacha20_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Curve25519_51.h b/lib/freebl/verified/internal/Hacl_Curve25519_51.h new file mode 100644 index 000000000..d7d05e89f --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Curve25519_51.h @@ -0,0 +1,53 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Curve25519_51_H +#define __internal_Hacl_Curve25519_51_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "internal/Hacl_Krmllib.h" +#include "../Hacl_Curve25519_51.h" + +void +Hacl_Curve25519_51_fsquare_times( + uint64_t *o, + uint64_t *inp, + FStar_UInt128_uint128 *tmp, + uint32_t n); + +void Hacl_Curve25519_51_finv(uint64_t *o, uint64_t *i, FStar_UInt128_uint128 *tmp); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Curve25519_51_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Hash_SHA1.h b/lib/freebl/verified/internal/Hacl_Hash_SHA1.h new file mode 100644 index 000000000..02ee03247 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Hash_SHA1.h @@ -0,0 +1,49 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA1_H +#define __internal_Hacl_Hash_SHA1_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA1.h" + +void Hacl_Hash_Core_SHA1_legacy_init(uint32_t *s); + +void Hacl_Hash_Core_SHA1_legacy_update(uint32_t *h, uint8_t *l); + +void Hacl_Hash_Core_SHA1_legacy_finish(uint32_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA1_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Hash_SHA2.h b/lib/freebl/verified/internal/Hacl_Hash_SHA2.h new file mode 100644 index 000000000..ed9894e71 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Hash_SHA2.h @@ -0,0 +1,65 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Hash_SHA2_H +#define __internal_Hacl_Hash_SHA2_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Hash_SHA2.h" + +void Hacl_Hash_Core_SHA2_init_224(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_256(uint32_t *s); + +void Hacl_Hash_Core_SHA2_init_384(uint64_t *s); + +void Hacl_Hash_Core_SHA2_init_512(uint64_t *s); + +void Hacl_Hash_Core_SHA2_update_384(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_update_512(uint64_t *hash, uint8_t *block); + +void Hacl_Hash_Core_SHA2_pad_256(uint64_t len, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_224(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_256(uint32_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_384(uint64_t *s, uint8_t *dst); + +void Hacl_Hash_Core_SHA2_finish_512(uint64_t *s, uint8_t *dst); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Hash_SHA2_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Krmllib.h b/lib/freebl/verified/internal/Hacl_Krmllib.h new file mode 100644 index 000000000..377843744 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Krmllib.h @@ -0,0 +1,45 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Krmllib_H +#define __internal_Hacl_Krmllib_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Krmllib.h" + +uint32_t LowStar_Vector_new_capacity(uint32_t cap); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Krmllib_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Poly1305_128.h b/lib/freebl/verified/internal/Hacl_Poly1305_128.h new file mode 100644 index 000000000..d5f257302 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Poly1305_128.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_128_H +#define __internal_Hacl_Poly1305_128_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_128.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_128_load_acc2(Lib_IntVector_Intrinsics_vec128 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_128_fmul_r2_normalize( + Lib_IntVector_Intrinsics_vec128 *out, + Lib_IntVector_Intrinsics_vec128 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_128_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Poly1305_256.h b/lib/freebl/verified/internal/Hacl_Poly1305_256.h new file mode 100644 index 000000000..9b1037923 --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Poly1305_256.h @@ -0,0 +1,51 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Poly1305_256_H +#define __internal_Hacl_Poly1305_256_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Poly1305_256.h" +#include "libintvector.h" +void +Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b); + +void +Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( + Lib_IntVector_Intrinsics_vec256 *out, + Lib_IntVector_Intrinsics_vec256 *p); + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Poly1305_256_H_DEFINED +#endif diff --git a/lib/freebl/verified/internal/Hacl_Spec.h b/lib/freebl/verified/internal/Hacl_Spec.h new file mode 100644 index 000000000..cf5376aba --- /dev/null +++ b/lib/freebl/verified/internal/Hacl_Spec.h @@ -0,0 +1,59 @@ +/* MIT License + * + * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef __internal_Hacl_Spec_H +#define __internal_Hacl_Spec_H + +#if defined(__cplusplus) +extern "C" { +#endif + +#include <string.h> +#include "krml/internal/types.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/target.h" + +#include "../Hacl_Spec.h" + +#define Spec_ECDSA_NoHash 0 +#define Spec_ECDSA_Hash 1 + +typedef uint8_t Spec_ECDSA_hash_alg_ecdsa_tags; + +typedef struct Spec_ECDSA_hash_alg_ecdsa_s { + Spec_ECDSA_hash_alg_ecdsa_tags tag; + Spec_Hash_Definitions_hash_alg _0; +} Spec_ECDSA_hash_alg_ecdsa; + +#define Spec_Cipher_Expansion_Hacl_CHACHA20 0 +#define Spec_Cipher_Expansion_Vale_AES128 1 +#define Spec_Cipher_Expansion_Vale_AES256 2 + +typedef uint8_t Spec_Cipher_Expansion_impl; + +#if defined(__cplusplus) +} +#endif + +#define __internal_Hacl_Spec_H_DEFINED +#endif diff --git a/lib/freebl/verified/karamel/include/krml/c_endianness.h b/lib/freebl/verified/karamel/include/krml/c_endianness.h new file mode 100644 index 000000000..21d7e1b4f --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/c_endianness.h @@ -0,0 +1,13 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_ENDIAN_H +#define __KRML_ENDIAN_H + +#ifdef __GNUC__ +#warning "c_endianness.h is deprecated, include lowstar_endianness.h instead" +#endif + +#include "lowstar_endianness.h" + +#endif diff --git a/lib/freebl/verified/karamel/include/krml/fstar_int.h b/lib/freebl/verified/karamel/include/krml/fstar_int.h new file mode 100644 index 000000000..c7a5afb50 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/fstar_int.h @@ -0,0 +1,89 @@ +#ifndef __FSTAR_INT_H +#define __FSTAR_INT_H + +#include "internal/types.h" + +/* + * Arithmetic Shift Right operator + * + * In all C standards, a >> b is implementation-defined when a has a signed + * type and a negative value. See e.g. 6.5.7 in + * http://www.open-std.org/jtc1/sc22/wg14/www/docs/n2310.pdf + * + * GCC, MSVC, and Clang implement a >> b as an arithmetic shift. + * + * GCC: https://gcc.gnu.org/onlinedocs/gcc-9.1.0/gcc/Integers-implementation.html#Integers-implementation + * MSVC: https://docs.microsoft.com/en-us/cpp/cpp/left-shift-and-right-shift-operators-input-and-output?view=vs-2019#right-shifts + * Clang: tested that Clang 7, 8 and 9 compile this to an arithmetic shift + * + * We implement arithmetic shift right simply as >> in these compilers + * and bail out in others. + */ + +#if !(defined(_MSC_VER) || defined(__GNUC__) || (defined(__clang__) && (__clang_major__ >= 7))) + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + do { + KRML_HOST_EPRINTF("Could not identify compiler so could not provide an implementation of signed arithmetic shift right.\n"); + KRML_HOST_EXIT(255); + } while (0); +} + +#else + +static inline int8_t +FStar_Int8_shift_arithmetic_right(int8_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int16_t +FStar_Int16_shift_arithmetic_right(int16_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int32_t +FStar_Int32_shift_arithmetic_right(int32_t a, uint32_t b) +{ + return (a >> b); +} + +static inline int64_t +FStar_Int64_shift_arithmetic_right(int64_t a, uint32_t b) +{ + return (a >> b); +} + +#endif /* !(defined(_MSC_VER) ... ) */ + +#endif /* __FSTAR_INT_H */ diff --git a/lib/freebl/verified/karamel/include/krml/internal/builtin.h b/lib/freebl/verified/karamel/include/krml/internal/builtin.h new file mode 100644 index 000000000..f55e5f824 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/internal/builtin.h @@ -0,0 +1,16 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_BUILTIN_H +#define __KRML_BUILTIN_H + +/* For alloca, when using KaRaMeL's -falloca */ +#if (defined(_WIN32) || defined(_WIN64)) +#include <malloc.h> +#endif + +/* If some globals need to be initialized before the main, then karamel will + * generate and try to link last a function with this type: */ +void krmlinit_globals(void); + +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h b/lib/freebl/verified/karamel/include/krml/internal/callconv.h index 8278b157d..0d250c445 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/callconv.h +++ b/lib/freebl/verified/karamel/include/krml/internal/callconv.h @@ -1,8 +1,8 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. Licensed under the Apache 2.0 License. */ -#ifndef __KREMLIN_CALLCONV_H -#define __KREMLIN_CALLCONV_H +#ifndef __KRML_CALLCONV_H +#define __KRML_CALLCONV_H /******************************************************************************/ /* Some macros to ease compatibility */ @@ -24,7 +24,7 @@ #endif #endif -/* Since KreMLin emits the inline keyword unconditionally, we follow the +/* Since KaRaMeL emits the inline keyword unconditionally, we follow the * guidelines at https://gcc.gnu.org/onlinedocs/gcc/Inline.html and make this * __inline__ to ensure the code compiles with -std=c90 and earlier. */ #ifdef __GNUC__ diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h b/lib/freebl/verified/karamel/include/krml/internal/compat.h index 964d1c52a..964d1c52a 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/compat.h +++ b/lib/freebl/verified/karamel/include/krml/internal/compat.h diff --git a/lib/freebl/verified/karamel/include/krml/internal/debug.h b/lib/freebl/verified/karamel/include/krml/internal/debug.h new file mode 100644 index 000000000..f70006bd3 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/internal/debug.h @@ -0,0 +1,57 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_DEBUG_H +#define __KRML_DEBUG_H + +#include <inttypes.h> + +#include "krml/internal/target.h" + +/******************************************************************************/ +/* Debugging helpers - intended only for KaRaMeL developers */ +/******************************************************************************/ + +/* In support of "-wasm -d force-c": we might need this function to be + * forward-declared, because the dependency on WasmSupport appears very late, + * after SimplifyWasm, and sadly, after the topological order has been done. */ +void WasmSupport_check_buffer_size(uint32_t s); + +/* A series of GCC atrocities to trace function calls (karamel's [-d c-calls] + * option). Useful when trying to debug, say, Wasm, to compare traces. */ +/* clang-format off */ +#ifdef __GNUC__ +#define KRML_FORMAT(X) _Generic((X), \ + uint8_t : "0x%08" PRIx8, \ + uint16_t: "0x%08" PRIx16, \ + uint32_t: "0x%08" PRIx32, \ + uint64_t: "0x%08" PRIx64, \ + int8_t : "0x%08" PRIx8, \ + int16_t : "0x%08" PRIx16, \ + int32_t : "0x%08" PRIx32, \ + int64_t : "0x%08" PRIx64, \ + default : "%s") + +#define KRML_FORMAT_ARG(X) _Generic((X), \ + uint8_t : X, \ + uint16_t: X, \ + uint32_t: X, \ + uint64_t: X, \ + int8_t : X, \ + int16_t : X, \ + int32_t : X, \ + int64_t : X, \ + default : "unknown") +/* clang-format on */ + +#define KRML_DEBUG_RETURN(X) \ + ({ \ + __auto_type _ret = (X); \ + KRML_HOST_PRINTF("returning: "); \ + KRML_HOST_PRINTF(KRML_FORMAT(_ret), KRML_FORMAT_ARG(_ret)); \ + KRML_HOST_PRINTF(" \n"); \ + _ret; \ + }) +#endif + +#endif diff --git a/lib/freebl/verified/karamel/include/krml/internal/target.h b/lib/freebl/verified/karamel/include/krml/internal/target.h new file mode 100644 index 000000000..929abe808 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/internal/target.h @@ -0,0 +1,333 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +#ifndef __KRML_TARGET_H +#define __KRML_TARGET_H + +#include <stdlib.h> +#include <stdio.h> +#include <stdbool.h> +#include <inttypes.h> +#include <limits.h> + +#include "krml/internal/callconv.h" + +/******************************************************************************/ +/* Macros that KaRaMeL will generate. */ +/******************************************************************************/ + +/* For "bare" targets that do not have a C stdlib, the user might want to use + * [-add-early-include '"mydefinitions.h"'] and override these. */ +#ifndef KRML_HOST_PRINTF +#define KRML_HOST_PRINTF printf +#endif + +#if ( \ + (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ + (!(defined KRML_HOST_EPRINTF))) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER) +#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) +#endif + +#ifndef KRML_HOST_EXIT +#define KRML_HOST_EXIT exit +#endif + +#ifndef KRML_HOST_MALLOC +#define KRML_HOST_MALLOC malloc +#endif + +#ifndef KRML_HOST_CALLOC +#define KRML_HOST_CALLOC calloc +#endif + +#ifndef KRML_HOST_FREE +#define KRML_HOST_FREE free +#endif + +#ifndef KRML_PRE_ALIGN +#ifdef _MSC_VER +#define KRML_PRE_ALIGN(X) __declspec(align(X)) +#else +#define KRML_PRE_ALIGN(X) +#endif +#endif + +#ifndef KRML_POST_ALIGN +#ifdef _MSC_VER +#define KRML_POST_ALIGN(X) +#else +#define KRML_POST_ALIGN(X) __attribute__((aligned(X))) +#endif +#endif + +#ifndef KRML_ALIGNED_MALLOC +#ifdef _MSC_VER +#define KRML_ALIGNED_MALLOC(X, Y) _aligned_malloc(Y, X) +#else +#define KRML_ALIGNED_MALLOC(X, Y) aligned_alloc(X, Y) +#endif +#endif + +#ifndef KRML_ALIGNED_FREE +#ifdef _MSC_VER +#define KRML_ALIGNED_FREE(X) _aligned_free(X) +#else +#define KRML_ALIGNED_FREE(X) free(X) +#endif +#endif + +#ifndef KRML_HOST_TIME + +#include <time.h> + +/* Prims_nat not yet in scope */ +inline static int32_t +krml_time() +{ + return (int32_t)time(NULL); +} + +#define KRML_HOST_TIME krml_time +#endif + +/* In statement position, exiting is easy. */ +#define KRML_EXIT \ + do { \ + KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ + KRML_HOST_EXIT(254); \ + } while (0) + +/* In expression position, use the comma-operator and a malloc to return an + * expression of the right size. KaRaMeL passes t as the parameter to the macro. + */ +#define KRML_EABORT(t, msg) \ + (KRML_HOST_PRINTF("KaRaMeL abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ + KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) + +/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of + * *elements*. Do an ugly, run-time check (some of which KaRaMeL can eliminate). + */ + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define _KRML_CHECK_SIZE_PRAGMA \ + _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") +#else +#define _KRML_CHECK_SIZE_PRAGMA +#endif + +#define KRML_CHECK_SIZE(size_elt, sz) \ + do { \ + _KRML_CHECK_SIZE_PRAGMA \ + if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ + KRML_HOST_PRINTF( \ + "Maximum allocatable size exceeded, aborting before overflow at " \ + "%s:%d\n", \ + __FILE__, __LINE__); \ + KRML_HOST_EXIT(253); \ + } \ + } while (0) + +#if defined(_MSC_VER) && _MSC_VER < 1900 +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg) +#else +#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg) +#endif + +#if defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 4)) +#define KRML_DEPRECATED(x) __attribute__((deprecated(x))) +#elif defined(__GNUC__) +/* deprecated attribute is not defined in GCC < 4.5. */ +#define KRML_DEPRECATED(x) +#elif defined(_MSC_VER) +#define KRML_DEPRECATED(x) __declspec(deprecated(x)) +#endif + +/* Macros for prettier unrolling of loops */ +#define KRML_LOOP1(i, n, x) \ + { \ + x \ + i += n; \ + } + +#define KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP3(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP5(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP6(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP7(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP9(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP1(i, n, x) + +#define KRML_LOOP10(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP2(i, n, x) + +#define KRML_LOOP11(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP3(i, n, x) + +#define KRML_LOOP12(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP4(i, n, x) + +#define KRML_LOOP13(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP5(i, n, x) + +#define KRML_LOOP14(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP6(i, n, x) + +#define KRML_LOOP15(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP7(i, n, x) + +#define KRML_LOOP16(i, n, x) \ + KRML_LOOP8(i, n, x) \ + KRML_LOOP8(i, n, x) + +#define KRML_UNROLL_FOR(i, z, n, k, x) \ + do { \ + uint32_t i = z; \ + KRML_LOOP##n(i, k, x) \ + } while (0) + +#define KRML_ACTUAL_FOR(i, z, n, k, x) \ + do { \ + for (uint32_t i = z; i < n; i += k) { \ + x \ + } \ + } while (0) + +#ifndef KRML_UNROLL_MAX +#define KRML_UNROLL_MAX 16 +#endif + +/* 1 is the number of loop iterations, i.e. (n - z)/k as evaluated by krml */ +#if 0 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR0(i, z, n, k, x) +#else +#define KRML_MAYBE_FOR0(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 1 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 1, k, x) +#else +#define KRML_MAYBE_FOR1(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 2 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 2, k, x) +#else +#define KRML_MAYBE_FOR2(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 3 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 3, k, x) +#else +#define KRML_MAYBE_FOR3(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 4 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 4, k, x) +#else +#define KRML_MAYBE_FOR4(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 5 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 5, k, x) +#else +#define KRML_MAYBE_FOR5(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 6 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 6, k, x) +#else +#define KRML_MAYBE_FOR6(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 7 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 7, k, x) +#else +#define KRML_MAYBE_FOR7(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 8 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 8, k, x) +#else +#define KRML_MAYBE_FOR8(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 9 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 9, k, x) +#else +#define KRML_MAYBE_FOR9(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 10 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 10, k, x) +#else +#define KRML_MAYBE_FOR10(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 11 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 11, k, x) +#else +#define KRML_MAYBE_FOR11(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 12 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 12, k, x) +#else +#define KRML_MAYBE_FOR12(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 13 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 13, k, x) +#else +#define KRML_MAYBE_FOR13(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 14 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 14, k, x) +#else +#define KRML_MAYBE_FOR14(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 15 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 15, k, x) +#else +#define KRML_MAYBE_FOR15(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif + +#if 16 <= KRML_UNROLL_MAX +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_UNROLL_FOR(i, z, 16, k, x) +#else +#define KRML_MAYBE_FOR16(i, z, n, k, x) KRML_ACTUAL_FOR(i, z, n, k, x) +#endif +#endif diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/types.h b/lib/freebl/verified/karamel/include/krml/internal/types.h index 2c966cb54..2cf1887ad 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/types.h +++ b/lib/freebl/verified/karamel/include/krml/internal/types.h @@ -22,7 +22,7 @@ typedef int16_t FStar_Int16_t, FStar_Int16_t_; typedef uint8_t FStar_UInt8_t, FStar_UInt8_t_; typedef int8_t FStar_Int8_t, FStar_Int8_t_; -/* Only useful when building Kremlib, because it's in the dependency graph of +/* Only useful when building krmllib, because it's in the dependency graph of * FStar.Int.Cast. */ typedef uint64_t FStar_UInt63_t, FStar_UInt63_t_; typedef int64_t FStar_Int63_t, FStar_Int63_t_; @@ -85,7 +85,7 @@ typedef struct FStar_UInt128_uint128_s { * latter is for internal use. */ typedef FStar_UInt128_uint128 FStar_UInt128_t, uint128_t; -#include "kremlin/lowstar_endianness.h" +#include "krml/lowstar_endianness.h" #endif diff --git a/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h b/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h new file mode 100644 index 000000000..b44fa3f75 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krml/internal/wasmsupport.h @@ -0,0 +1,5 @@ +/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. + Licensed under the Apache 2.0 License. */ + +/* This file is automatically included when compiling with -wasm -d force-c */ +#define WasmSupport_check_buffer_size(X) diff --git a/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h b/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h index 2a13cc9f1..48e9fd579 100644 --- a/lib/freebl/verified/kremlin/include/kremlin/lowstar_endianness.h +++ b/lib/freebl/verified/karamel/include/krml/lowstar_endianness.h @@ -12,7 +12,7 @@ /******************************************************************************/ /* ... for Linux */ -#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) +#if defined(__linux__) || defined(__CYGWIN__) || defined(__USE_SYSTEM_ENDIAN_H__) || defined(__GLIBC__) #include <endian.h> /* ... for OSX */ diff --git a/lib/freebl/verified/karamel/include/krmllib.h b/lib/freebl/verified/karamel/include/krmllib.h new file mode 100644 index 000000000..1f461f351 --- /dev/null +++ b/lib/freebl/verified/karamel/include/krmllib.h @@ -0,0 +1,28 @@ +#ifndef __KRMLLIB_H +#define __KRMLLIB_H + +/******************************************************************************/ +/* The all-in-one krmllib.h header */ +/******************************************************************************/ + +/* This is a meta-header that is included by default in KaRaMeL generated + * programs. If you wish to have a more lightweight set of headers, or are + * targeting an environment where controlling these macros yourself is + * important, consider using: + * + * krml -minimal + * + * to disable the inclusion of this file (note: this also disables the default + * argument "-bundle FStar.*"). You can then include the headers of your choice + * one by one, using -add-early-include. */ + +#include "krml/internal/target.h" +#include "krml/internal/callconv.h" +#include "krml/internal/builtin.h" +#include "krml/internal/debug.h" +#include "krml/internal/types.h" + +#include "krml/lowstar_endianness.h" +#include "krml/fstar_int.h" + +#endif /* __KRMLLIB_H */ diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h index 57b9b7156..4affcee35 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128.h @@ -5,13 +5,13 @@ #ifndef __FStar_UInt128_H #define __FStar_UInt128_H + #include <inttypes.h> #include <stdbool.h> -#include "kremlin/internal/compat.h" -#include "kremlin/lowstar_endianness.h" -#include "kremlin/internal/types.h" -#include "kremlin/internal/target.h" - +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" static inline FStar_UInt128_uint128 FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b); diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h index a5de03751..8f235c314 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt128_Verified.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt128_Verified.h @@ -5,13 +5,12 @@ #ifndef __FStar_UInt128_Verified_H #define __FStar_UInt128_Verified_H -#include <inttypes.h> -#include <stdbool.h> -#include "kremlin/internal/types.h" -#include "kremlin/internal/target.h" #include "FStar_UInt_8_16_32_64.h" - +#include <inttypes.h> +#include <stdbool.h> +#include "krml/internal/types.h" +#include "krml/internal/target.h" static inline uint64_t FStar_UInt128_constant_time_carry(uint64_t a, uint64_t b) { diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h index 08884599c..51f3eead1 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/FStar_UInt_8_16_32_64.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/FStar_UInt_8_16_32_64.h @@ -5,13 +5,13 @@ #ifndef __FStar_UInt_8_16_32_64_H #define __FStar_UInt_8_16_32_64_H + #include <inttypes.h> #include <stdbool.h> -#include "kremlin/internal/compat.h" -#include "kremlin/lowstar_endianness.h" -#include "kremlin/internal/types.h" -#include "kremlin/internal/target.h" - +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" extern Prims_int FStar_UInt64_n; extern bool FStar_UInt64_uu___is_Mk(uint64_t projectee); @@ -22,6 +22,10 @@ extern Prims_int FStar_UInt64_v(uint64_t x); extern uint64_t FStar_UInt64_uint_to_t(Prims_int x); +extern uint64_t FStar_UInt64_zero; + +extern uint64_t FStar_UInt64_one; + extern uint64_t FStar_UInt64_minus(uint64_t a); extern uint32_t FStar_UInt64_n_minus_one; @@ -68,6 +72,10 @@ extern Prims_int FStar_UInt32_v(uint32_t x); extern uint32_t FStar_UInt32_uint_to_t(Prims_int x); +extern uint32_t FStar_UInt32_zero; + +extern uint32_t FStar_UInt32_one; + extern uint32_t FStar_UInt32_minus(uint32_t a); extern uint32_t FStar_UInt32_n_minus_one; @@ -114,6 +122,10 @@ extern Prims_int FStar_UInt16_v(uint16_t x); extern uint16_t FStar_UInt16_uint_to_t(Prims_int x); +extern uint16_t FStar_UInt16_zero; + +extern uint16_t FStar_UInt16_one; + extern uint16_t FStar_UInt16_minus(uint16_t a); extern uint32_t FStar_UInt16_n_minus_one; @@ -160,6 +172,10 @@ extern Prims_int FStar_UInt8_v(uint8_t x); extern uint8_t FStar_UInt8_uint_to_t(Prims_int x); +extern uint8_t FStar_UInt8_zero; + +extern uint8_t FStar_UInt8_one; + extern uint8_t FStar_UInt8_minus(uint8_t a); extern uint32_t FStar_UInt8_n_minus_one; diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h index 6d86cd584..5feb077a4 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/LowStar_Endianness.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/LowStar_Endianness.h @@ -5,13 +5,14 @@ #ifndef __LowStar_Endianness_H #define __LowStar_Endianness_H + +#include "FStar_UInt128.h" #include <inttypes.h> #include <stdbool.h> -#include "kremlin/internal/compat.h" -#include "kremlin/lowstar_endianness.h" -#include "kremlin/internal/types.h" -#include "kremlin/internal/target.h" - +#include "krml/internal/compat.h" +#include "krml/lowstar_endianness.h" +#include "krml/internal/types.h" +#include "krml/internal/target.h" static inline void store128_le(uint8_t *x0, FStar_UInt128_uint128 x1); static inline FStar_UInt128_uint128 load128_le(uint8_t *x0); diff --git a/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic b/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic new file mode 100644 index 000000000..672b58015 --- /dev/null +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.basic @@ -0,0 +1,56 @@ +# A basic Makefile that KaRaMeL copies in the output directory; this is not +# guaranteed to work and will only work well for very simple projects. This +# Makefile uses: +# - the custom C files passed to your krml invocation +# - the custom C flags passed to your krml invocation +# - the -o option passed to your krml invocation + +include Makefile.include + +ifeq (,$(KRML_HOME)) + $(error please define KRML_HOME to point to the root of your KaRaMeL git checkout) +endif + +CFLAGS += -I. -I $(KRML_HOME)/include -I $(KRML_HOME)/krmllib/dist/minimal +CFLAGS += -Wall -Wextra -Werror -std=c11 -Wno-unused-variable \ + -Wno-unknown-warning-option -Wno-unused-but-set-variable -Wno-unused-function \ + -Wno-unused-parameter -Wno-infinite-recursion \ + -g -fwrapv -D_BSD_SOURCE -D_DEFAULT_SOURCE +ifeq ($(OS),Windows_NT) +CFLAGS += -D__USE_MINGW_ANSI_STDIO +else +CFLAGS += -fPIC +endif +CFLAGS += $(USER_CFLAGS) + +SOURCES += $(ALL_C_FILES) $(USER_C_FILES) +ifneq (,$(BLACKLIST)) + SOURCES := $(filter-out $(BLACKLIST),$(SOURCES)) +endif +OBJS += $(patsubst %.c,%.o,$(SOURCES)) + +all: $(USER_TARGET) + +$(USER_TARGET): $(OBJS) + +AR ?= ar + +%.a: + $(AR) cr $@ $^ + +%.exe: + $(CC) $(CFLAGS) -o $@ $^ $(KRML_HOME)/krmllib/dist/generic/libkrmllib.a + +%.so: + $(CC) $(CFLAGS) -shared -o $@ $^ + +%.d: %.c + @set -e; rm -f $@; \ + $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ + sed 's,\($(notdir $*)\)\.o[ :]*,$(dir $@)\1.o $@ : ,g' < $@.$$$$ > $@; \ + rm -f $@.$$$$ + +include $(patsubst %.c,%.d,$(SOURCES)) + +clean: + rm -rf *.o *.d $(USER_TARGET) diff --git a/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include b/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include new file mode 100644 index 000000000..ad5321718 --- /dev/null +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/Makefile.include @@ -0,0 +1,5 @@ +USER_TARGET=libkrmllib.a +USER_CFLAGS= +USER_C_FILES=fstar_uint128.c +ALL_C_FILES= +ALL_H_FILES=FStar_UInt128.h FStar_UInt_8_16_32_64.h LowStar_Endianness.h diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h index 441928def..33cff6b6d 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_gcc64.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_gcc64.h @@ -16,7 +16,7 @@ * FStar.UInt128 to avoid a maze of preprocessor guards and hand-written code. * */ -/* This file is used for both the minimal and generic kremlib distributions. As +/* This file is used for both the minimal and generic krmllib distributions. As * such, it assumes that the machine integers have been bundled the exact same * way in both cases. */ diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h index 5969ff028..e9b366e25 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_msvc.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_msvc.h @@ -1,17 +1,17 @@ /* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. Licensed under the Apache 2.0 License. */ -/* This file was generated by KreMLin <https://github.com/FStarLang/kremlin> - * then hand-edited to use MSVC intrinsics KreMLin invocation: - * C:\users\barrybo\mitls2c\kremlin\_build\src\Kremlin.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "kremlib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims +/* This file was generated by KaRaMeL <https://github.com/FStarLang/karamel> + * then hand-edited to use MSVC intrinsics KaRaMeL invocation: + * C:\users\barrybo\mitls2c\karamel\_build\src\Karamel.native -minimal -fnouint128 C:/users/barrybo/mitls2c/FStar/ulib/FStar.UInt128.fst -tmpdir ../secure_api/out/runtime_switch/uint128 -skip-compilation -add-include "krmllib0.h" -drop FStar.Int.Cast.Full -bundle FStar.UInt128=FStar.*,Prims * F* version: 15104ff8 - * KreMLin version: 318b7fa8 + * KaRaMeL version: 318b7fa8 */ #ifndef FSTAR_UINT128_MSVC #define FSTAR_UINT128_MSVC -#include "kremlin/internal/types.h" +#include "krml/internal/types.h" #include "FStar_UInt128.h" #include "FStar_UInt_8_16_32_64.h" @@ -44,8 +44,10 @@ load128_le(uint8_t *b) #if HAS_OPTIMIZED return _mm_loadu_si128((__m128i *)b); #else - return ( - (FStar_UInt128_uint128){ .low = load64_le(b), .high = load64_le(b + 8) }); + FStar_UInt128_uint128 lit; + lit.low = load64_le(b); + lit.high = load64_le(b + 8); + return lit; #endif } @@ -64,7 +66,10 @@ load128_be(uint8_t *b) #if HAS_OPTIMIZED return _mm_set_epi64x(h, l); #else - return ((FStar_UInt128_uint128){ .low = l, .high = h }); + FStar_UInt128_uint128 lit; + lit.low = l; + lit.high = h; + return lit; #endif } @@ -98,9 +103,10 @@ FStar_UInt128_add(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) _addcarry_u64(carry, a.high, b.high, &h); // high = a.high+b.high+CF return _mm_set_epi64x(h, l); #else - return ((FStar_UInt128_uint128){ - .low = a.low + b.low, - .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; #endif } @@ -110,9 +116,10 @@ FStar_UInt128_add_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return FStar_UInt128_add(a, b); #else - return ((FStar_UInt128_uint128){ - .low = a.low + b.low, - .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low; + return lit; #endif } @@ -122,9 +129,10 @@ FStar_UInt128_add_mod(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return FStar_UInt128_add(a, b); #else - return ((FStar_UInt128_uint128){ - .low = a.low + b.low, - .high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low + b.low; + lit.high = a.high + b.high + FStar_UInt128_carry(a.low + b.low, b.low); + return lit; #endif } @@ -138,9 +146,10 @@ FStar_UInt128_sub(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) _subborrow_u64(borrow, a.high, b.high, &h); return _mm_set_epi64x(h, l); #else - return ((FStar_UInt128_uint128){ - .low = a.low - b.low, - .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; #endif } @@ -150,18 +159,20 @@ FStar_UInt128_sub_underspec(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return FStar_UInt128_sub(a, b); #else - return ((FStar_UInt128_uint128){ - .low = a.low - b.low, - .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; #endif } inline static FStar_UInt128_uint128 FStar_UInt128_sub_mod_impl(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) { - return ((FStar_UInt128_uint128){ - .low = a.low - b.low, - .high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low) }); + FStar_UInt128_uint128 lit; + lit.low = a.low - b.low; + lit.high = a.high - b.high - FStar_UInt128_carry(a.low, a.low - b.low); + return lit; } inline static FStar_UInt128_uint128 @@ -180,8 +191,10 @@ FStar_UInt128_logand(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return _mm_and_si128(a, b); #else - return ( - (FStar_UInt128_uint128){ .low = a.low & b.low, .high = a.high & b.high }); + FStar_UInt128_uint128 lit; + lit.low = a.low & b.low; + lit.high = a.high & b.high; + return lit; #endif } @@ -191,8 +204,10 @@ FStar_UInt128_logxor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return _mm_xor_si128(a, b); #else - return ( - (FStar_UInt128_uint128){ .low = a.low ^ b.low, .high = a.high ^ b.high }); + FStar_UInt128_uint128 lit; + lit.low = a.low ^ b.low; + lit.high = a.high ^ b.high; + return lit; #endif } @@ -202,8 +217,10 @@ FStar_UInt128_logor(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) #if HAS_OPTIMIZED return _mm_or_si128(a, b); #else - return ( - (FStar_UInt128_uint128){ .low = a.low | b.low, .high = a.high | b.high }); + FStar_UInt128_uint128 lit; + lit.low = a.low | b.low; + lit.high = a.high | b.high; + return lit; #endif } @@ -213,7 +230,10 @@ FStar_UInt128_lognot(FStar_UInt128_uint128 a) #if HAS_OPTIMIZED return _mm_andnot_si128(a, a); #else - return ((FStar_UInt128_uint128){ .low = ~a.low, .high = ~a.high }); + FStar_UInt128_uint128 lit; + lit.low = ~a.low; + lit.high = ~a.high; + return lit; #endif } @@ -236,17 +256,21 @@ FStar_UInt128_shift_left_small(FStar_UInt128_uint128 a, uint32_t s) { if (s == (uint32_t)0U) return a; - else - return ((FStar_UInt128_uint128){ - .low = a.low << s, - .high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s) }); + else { + FStar_UInt128_uint128 lit; + lit.low = a.low << s; + lit.high = FStar_UInt128_add_u64_shift_left_respec(a.high, a.low, s); + return lit; + } } inline static FStar_UInt128_uint128 FStar_UInt128_shift_left_large(FStar_UInt128_uint128 a, uint32_t s) { - return ((FStar_UInt128_uint128){ .low = (uint64_t)0U, - .high = a.low << s - FStar_UInt128_u32_64 }); + FStar_UInt128_uint128 lit; + lit.low = (uint64_t)0U; + lit.high = a.low << s - FStar_UInt128_u32_64; + return lit; } inline static FStar_UInt128_uint128 @@ -287,17 +311,21 @@ FStar_UInt128_shift_right_small(FStar_UInt128_uint128 a, uint32_t s) { if (s == (uint32_t)0U) return a; - else - return ((FStar_UInt128_uint128){ - .low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s), - .high = a.high >> s }); + else { + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_add_u64_shift_right_respec(a.high, a.low, s); + lit.high = a.high >> s; + return lit; + } } inline static FStar_UInt128_uint128 FStar_UInt128_shift_right_large(FStar_UInt128_uint128 a, uint32_t s) { - return ((FStar_UInt128_uint128){ .low = a.high >> s - FStar_UInt128_u32_64, - .high = (uint64_t)0U }); + FStar_UInt128_uint128 lit; + lit.low = a.high >> s - FStar_UInt128_u32_64; + lit.high = (uint64_t)0U; + return lit; } inline static FStar_UInt128_uint128 @@ -367,11 +395,10 @@ FStar_UInt128_eq_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) // And them together return _mm_and_si128(ret64, s64); #else - return ( - (FStar_UInt128_uint128){ .low = FStar_UInt64_eq_mask(a.low, b.low) & - FStar_UInt64_eq_mask(a.high, b.high), - .high = FStar_UInt64_eq_mask(a.low, b.low) & - FStar_UInt64_eq_mask(a.high, b.high) }); + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + lit.high = FStar_UInt64_eq_mask(a.low, b.low) & FStar_UInt64_eq_mask(a.high, b.high); + return lit; #endif } @@ -408,15 +435,16 @@ FStar_UInt128_gte_mask(FStar_UInt128_uint128 a, FStar_UInt128_uint128 b) ret, _MM_SHUFFLE(0, 0, 0, 0)); // the result is in 0. Shuffle into all dwords. #else - return ((FStar_UInt128_uint128){ - .low = FStar_UInt64_gte_mask(a.high, b.high) & + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt64_gte_mask(a.high, b.high) & + ~FStar_UInt64_eq_mask(a.high, b.high) | + FStar_UInt64_eq_mask(a.high, b.high) & + FStar_UInt64_gte_mask(a.low, b.low); + lit.high = FStar_UInt64_gte_mask(a.high, b.high) & ~FStar_UInt64_eq_mask(a.high, b.high) | FStar_UInt64_eq_mask(a.high, b.high) & - FStar_UInt64_gte_mask(a.low, b.low), - .high = FStar_UInt64_gte_mask(a.high, b.high) & - ~FStar_UInt64_eq_mask(a.high, b.high) | - FStar_UInt64_eq_mask(a.high, b.high) & - FStar_UInt64_gte_mask(a.low, b.low) }); + FStar_UInt64_gte_mask(a.low, b.low); + return lit; #endif } @@ -426,7 +454,10 @@ FStar_UInt128_uint64_to_uint128(uint64_t a) #if HAS_OPTIMIZED return _mm_set_epi64x(0, a); #else - return ((FStar_UInt128_uint128){ .low = a, .high = (uint64_t)0U }); + FStar_UInt128_uint128 lit; + lit.low = a; + lit.high = (uint64_t)0U; + return lit; #endif } @@ -458,23 +489,24 @@ FStar_UInt128_mul32(uint64_t x, uint32_t y) l = _umul128(x, (uint64_t)y, &h); return _mm_set_epi64x(h, l); #else - return ((FStar_UInt128_uint128){ - .low = FStar_UInt128_u32_combine( - (x >> FStar_UInt128_u32_32) * (uint64_t)y + - (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> - FStar_UInt128_u32_32), - FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)), - .high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + - (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> - FStar_UInt128_u32_32) >> - FStar_UInt128_u32_32 }); + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine( + (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32), + FStar_UInt128_u64_mod_32(FStar_UInt128_u64_mod_32(x) * (uint64_t)y)); + lit.high = (x >> FStar_UInt128_u32_32) * (uint64_t)y + + (FStar_UInt128_u64_mod_32(x) * (uint64_t)y >> + FStar_UInt128_u32_32) >> + FStar_UInt128_u32_32; + return lit; #endif } /* Note: static headers bring scope collision issues when they define types! - * Because now client (kremlin-generated) code will include this header and + * Because now client (karamel-generated) code will include this header and * there might be type collisions if the client code uses quadruples of uint64s. - * So, we cannot use the kremlin-generated name. */ + * So, we cannot use the karamel-generated name. */ typedef struct K_quad_s { uint64_t fst; uint64_t snd; @@ -485,14 +517,15 @@ typedef struct K_quad_s { inline static K_quad FStar_UInt128_mul_wide_impl_t_(uint64_t x, uint64_t y) { - return ((K_quad){ - .fst = FStar_UInt128_u64_mod_32(x), - .snd = FStar_UInt128_u64_mod_32( - FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)), - .thd = x >> FStar_UInt128_u32_32, - .f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + - (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> - FStar_UInt128_u32_32) }); + K_quad tmp; + tmp.fst = FStar_UInt128_u64_mod_32(x); + tmp.snd = FStar_UInt128_u64_mod_32( + FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y)); + tmp.thd = x >> FStar_UInt128_u32_32; + tmp.f3 = (x >> FStar_UInt128_u32_32) * FStar_UInt128_u64_mod_32(y) + + (FStar_UInt128_u64_mod_32(x) * FStar_UInt128_u64_mod_32(y) >> + FStar_UInt128_u32_32); + return tmp; } static uint64_t @@ -510,13 +543,14 @@ FStar_UInt128_mul_wide_impl(uint64_t x, uint64_t y) uint64_t w3 = scrut.snd; uint64_t x_ = scrut.thd; uint64_t t_ = scrut.f3; - return ((FStar_UInt128_uint128){ - .low = FStar_UInt128_u32_combine_( - u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3), - .high = - x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + - (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >> - FStar_UInt128_u32_32) }); + FStar_UInt128_uint128 lit; + lit.low = FStar_UInt128_u32_combine_( + u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_), w3); + lit.high = + x_ * (y >> FStar_UInt128_u32_32) + (t_ >> FStar_UInt128_u32_32) + + (u1 * (y >> FStar_UInt128_u32_32) + FStar_UInt128_u64_mod_32(t_) >> + FStar_UInt128_u32_32); + return lit; } inline static FStar_UInt128_uint128 diff --git a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h index 61fe85c49..61fe85c49 100644 --- a/lib/freebl/verified/kremlin/kremlib/dist/minimal/fstar_uint128_struct_endianness.h +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/fstar_uint128_struct_endianness.h diff --git a/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def b/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def new file mode 100644 index 000000000..c4ab8e38e --- /dev/null +++ b/lib/freebl/verified/karamel/krmllib/dist/minimal/libkrmllib.def @@ -0,0 +1,11 @@ +LIBRARY libkrmllib + +EXPORTS + FStar_UInt64_eq_mask + FStar_UInt64_gte_mask + FStar_UInt32_eq_mask + FStar_UInt32_gte_mask + FStar_UInt16_eq_mask + FStar_UInt16_gte_mask + FStar_UInt8_eq_mask + FStar_UInt8_gte_mask diff --git a/lib/freebl/verified/kremlin/include/kremlin/internal/target.h b/lib/freebl/verified/kremlin/include/kremlin/internal/target.h deleted file mode 100644 index 0affdaa80..000000000 --- a/lib/freebl/verified/kremlin/include/kremlin/internal/target.h +++ /dev/null @@ -1,115 +0,0 @@ -/* Copyright (c) INRIA and Microsoft Corporation. All rights reserved. - Licensed under the Apache 2.0 License. */ - -#ifndef __KREMLIN_TARGET_H -#define __KREMLIN_TARGET_H - -#include <stdlib.h> -#include <stdio.h> -#include <stdbool.h> -#include <inttypes.h> -#include <limits.h> - -#include "kremlin/internal/callconv.h" - -/******************************************************************************/ -/* Macros that KreMLin will generate. */ -/******************************************************************************/ - -/* For "bare" targets that do not have a C stdlib, the user might want to use - * [-add-early-include '"mydefinitions.h"'] and override these. */ -#ifndef KRML_HOST_PRINTF -#define KRML_HOST_PRINTF printf -#endif - -#if ( \ - (defined __STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && \ - (!(defined KRML_HOST_EPRINTF))) -#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) -#elif !(defined KRML_HOST_EPRINTF) && defined(_MSC_VER) -#define KRML_HOST_EPRINTF(...) fprintf(stderr, __VA_ARGS__) -#endif - -#ifndef KRML_HOST_EXIT -#define KRML_HOST_EXIT exit -#endif - -#ifndef KRML_HOST_MALLOC -#define KRML_HOST_MALLOC malloc -#endif - -#ifndef KRML_HOST_CALLOC -#define KRML_HOST_CALLOC calloc -#endif - -#ifndef KRML_HOST_FREE -#define KRML_HOST_FREE free -#endif - -#ifndef KRML_HOST_TIME - -#include <time.h> - -/* Prims_nat not yet in scope */ -inline static int32_t -krml_time() -{ - return (int32_t)time(NULL); -} - -#define KRML_HOST_TIME krml_time -#endif - -/* In statement position, exiting is easy. */ -#define KRML_EXIT \ - do { \ - KRML_HOST_PRINTF("Unimplemented function at %s:%d\n", __FILE__, __LINE__); \ - KRML_HOST_EXIT(254); \ - } while (0) - -/* In expression position, use the comma-operator and a malloc to return an - * expression of the right size. KreMLin passes t as the parameter to the macro. - */ -#define KRML_EABORT(t, msg) \ - (KRML_HOST_PRINTF("KreMLin abort at %s:%d\n%s\n", __FILE__, __LINE__, msg), \ - KRML_HOST_EXIT(255), *((t *)KRML_HOST_MALLOC(sizeof(t)))) - -/* In FStar.Buffer.fst, the size of arrays is uint32_t, but it's a number of - * *elements*. Do an ugly, run-time check (some of which KreMLin can eliminate). - */ - -#ifdef __GNUC__ -#define _KRML_CHECK_SIZE_PRAGMA \ - _Pragma("GCC diagnostic ignored \"-Wtype-limits\"") -#else -#define _KRML_CHECK_SIZE_PRAGMA -#endif - -#define KRML_CHECK_SIZE(size_elt, sz) \ - do { \ - _KRML_CHECK_SIZE_PRAGMA \ - if (((size_t)(sz)) > ((size_t)(SIZE_MAX / (size_elt)))) { \ - KRML_HOST_PRINTF( \ - "Maximum allocatable size exceeded, aborting before overflow at " \ - "%s:%d\n", \ - __FILE__, __LINE__); \ - KRML_HOST_EXIT(253); \ - } \ - } while (0) - -#if defined(_MSC_VER) && _MSC_VER < 1900 -#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) _snprintf_s(buf, sz, _TRUNCATE, fmt, arg) -#else -#define KRML_HOST_SNPRINTF(buf, sz, fmt, arg) snprintf(buf, sz, fmt, arg) -#endif - -#if defined(__GNUC__) && __GNUC__ >= 4 && __GNUC_MINOR__ > 4 -#define KRML_DEPRECATED(x) __attribute__((deprecated(x))) -#elif defined(__GNUC__) -/* deprecated attribute is not defined in GCC < 4.5. */ -#define KRML_DEPRECATED(x) -#elif defined(_MSC_VER) -#define KRML_DEPRECATED(x) __declspec(deprecated(x)) -#endif - -#endif diff --git a/lib/freebl/verified/libintvector.h b/lib/freebl/verified/libintvector.h index 7f6714b02..fab6a35d1 100644 --- a/lib/freebl/verified/libintvector.h +++ b/lib/freebl/verified/libintvector.h @@ -3,28 +3,25 @@ #include <sys/types.h> -// # DEBUGGING FLAGS -// ================= -// It is possible to debug the trace of the primitives defined in -// this file by using the [DEBUG_VECTOR_TRACE] C flag. -// As we use the same vector types to manipulate blocks of uint32 and blocks -// of uint64, the log results will vary with the endianess, in particular for -// some generic operations like [and] or [xor]. By default, the printing is -// performed as if we were manipulating blocks of uint32. If you want to -// switch to blocks of uint64, use the flag: [DEBUG_VECTOR_TRACE_ELEMENTS_64]. -// Note that if those flags are activated, it may be necessary to tweak a bit -// the compilation options to build HACL. More specifically, you may need to -// always activate the compiler options to use vector support (even for files -// which actually don't make use of vectors, if they have libintvector.h as -// a dependency). When comparing traces, note that some instructions are not -// compiled in the same order on the different platforms, but it doesn't lead -// to a lot of discrepancies in practice. +/* We include config.h here to ensure that the various feature-flags are + * properly brought into scope. Users can either run the configure script, or + * write a config.h themselves and put it under version control. */ +#if defined(__has_include) +#if __has_include("config.h") +#include "config.h" +#endif +#endif + +/* # DEBUGGING: + * ============ + * It is possible to debug the current definitions by using libintvector_debug.h + * See the include at the bottom of the file. */ #define Lib_IntVector_Intrinsics_bit_mask64(x) -((x)&1) #if defined(__x86_64__) || defined(_M_X64) -// The following functions are only available on machines that support Intel AVX +#if defined(HACL_CAN_COMPILE_VEC128) #include <emmintrin.h> #include <tmmintrin.h> @@ -215,7 +212,9 @@ typedef __m128i Lib_IntVector_Intrinsics_vec128; #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ (_mm_unpackhi_epi64(x1, x2)) -// The following functions are only available on machines that support Intel AVX2 +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#if defined(HACL_CAN_COMPILE_VEC256) #include <immintrin.h> #include <wmmintrin.h> @@ -423,7 +422,12 @@ typedef __m256i Lib_IntVector_Intrinsics_vec256; #define Lib_IntVector_Intrinsics_vec256_interleave_high128(x1, x2) \ (_mm256_permute2x128_si256(x1, x2, 0x31)) +#endif /* HACL_CAN_COMPILE_VEC256 */ + #elif (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && !defined(__ARM_32BIT_STATE) + +#if defined(HACL_CAN_COMPILE_VEC128) + #include <arm_neon.h> typedef uint32x4_t Lib_IntVector_Intrinsics_vec128; @@ -617,15 +621,20 @@ Lib_IntVector_Intrinsics_vec128_load32s(uint32_t x1, uint32_t x2, uint32_t x3, u #define Lib_IntVector_Intrinsics_vec128_interleave_high64(x1, x2) \ (vreinterpretq_u32_u64(vzip2q_u64(vreinterpretq_u64_u32(x1), vreinterpretq_u64_u32(x2)))) -// IBM z architecture -#elif defined(__s390x__) // this flag is for GCC only +#endif /* HACL_CAN_COMPILE_VEC128 */ + +/* IBM z architecture */ +#elif defined(__s390x__) /* this flag is for GCC only */ +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <stdint.h> #include <vecintrin.h> -// The main vector 128 type -// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, -// unsigned int, unsigned long long: the compiler complains that the parameter -// combination is invalid. +/* The main vector 128 type + * We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, + * unsigned int, unsigned long long: the compiler complains that the parameter + * combination is invalid. */ typedef unsigned char vector128_8 __attribute__((vector_size(16))); typedef unsigned int vector128_32 __attribute__((vector_size(16))); typedef unsigned long long vector128_64 __attribute__((vector_size(16))); @@ -633,33 +642,32 @@ typedef unsigned long long vector128_64 __attribute__((vector_size(16))); typedef vector128_8 Lib_IntVector_Intrinsics_vec128; typedef vector128_8 vector128; -// Small helper to change the endianess of the vector's elements, seen as uint32. -// Note that we can't use vec_revb. -#define Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32(x0) \ - ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ - (vector128_8){ 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }))) +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + (vector128)((vector128_32)vec_revb(*((vector128_32*)(const uint8_t*)(x)))) -// Small helper to change the endianess of the vector's elements, seen as uint64 -// Note that we can't use vec_revb. -#define Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64(x0) \ - ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ - (vector128_8){ 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 }))) +#define Lib_IntVector_Intrinsics_vec128_load32_be(x) \ + (vector128)(*((vector128_32*)(const uint8_t*)(x))) -#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ - ((vector128)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32( \ - ((vector128_8)vec_load_len((const uint8_t*)(x), 16)))) +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + (vector128)((vector128_64)vec_revb(*((vector128_64*)(const uint8_t*)(x)))) -#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ - ((vector128)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64( \ - ((vector128_8)vec_load_len((const uint8_t*)(x), 16)))) +static inline void +Lib_IntVector_Intrinsics_vec128_store32_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = vec_revb((vector128_32)x1); +} -#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ - (vec_store_len(((vector128_8)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian32(x1)), \ - ((uint8_t*)(x0)), (uint32_t)16)) +static inline void +Lib_IntVector_Intrinsics_vec128_store32_be(const uint8_t *x0, vector128 x1) +{ + *((vector128_32 *)x0) = (vector128_32)x1; +} -#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ - (vec_store_len(((vector128_8)Lib_IntVector_Intrinsics_vec128_load_store_switch_endian64(x1)), \ - ((uint8_t*)(x0)), (uint32_t)16)) +static inline void +Lib_IntVector_Intrinsics_vec128_store64_le(const uint8_t *x0, vector128 x1) +{ + *((vector128_64 *)x0) = vec_revb((vector128_64)x1); +} #define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) @@ -719,15 +727,9 @@ typedef vector128_8 vector128; #define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) -// We need to permute the low and high components of the uint64 -// before calling vec_mule. The following helper does that. -#define Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x0) \ - ((vector128)(vec_perm((vector128_8)(x0), (vector128_8){}, \ - (vector128_8){ 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }))) - -#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ - ((vector128)(vec_mule((vector128_32)Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x0), \ - (vector128_32)Lib_IntVector_Intrinsics_vec128_mul64_perm_low_high_(x1)))) +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mulo((vector128_32)(x0), \ + (vector128_32)(x1)))) #define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) @@ -739,7 +741,7 @@ typedef vector128_8 vector128; (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) #define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ - ((vector128)(vec_perm((vector128)(x0), (vector128){}, (vector128_8){ (x1 % 4) * 4 + 0, (x1 % 4) * 4 + 1, (x1 % 4) * 4 + 2, (x1 % 4) * 4 + 3, ((x1 + 1) % 4) * 4 + 0, ((x1 + 1) % 4) * 4 + 1, ((x1 + 1) % 4) * 4 + 2, ((x1 + 1) % 4) * 4 + 3, ((x1 + 2) % 4) * 4 + 0, ((x1 + 2) % 4) * 4 + 1, ((x1 + 2) % 4) * 4 + 2, ((x1 + 2) % 4) * 4 + 3, ((x1 + 3) % 4) * 4 + 0, ((x1 + 3) % 4) * 4 + 1, ((x1 + 3) % 4) * 4 + 2, ((x1 + 3) % 4) * 4 + 3 }))) + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), (x1 % 4) * 4))) #define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(x1)))) & \ @@ -749,6 +751,140 @@ typedef vector128_8 vector128; (((vector128)((vector128_64)vec_rli((vector128_64)(x0), (unsigned long)(64 - (x1))))) & \ ((vector128)((vector128_64){ 0xffffffffffffffff >> (x1), 0xffffffffffffffff >> (x1) }))) +#define Lib_IntVector_Intrinsics_vec128_shift_right32(x0, x1) \ + (((vector128)((vector128_32)vec_rli((vector128_32)(x0), (unsigned int)(32 - (x1))))) & \ + ((vector128)((vector128_32){ 0xffffffff >> (x1), 0xffffffff >> (x1), \ + 0xffffffff >> (x1), 0xffffffff >> (x1) }))) + +/* Doesn't work with vec_splat_u64 */ +#define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ + ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) + +#define Lib_IntVector_Intrinsics_vec128_sub64(x0, x1) \ + ((vector128)((vector128_64)(x0) - (vector128_64)(x1))) + +static inline vector128 +Lib_IntVector_Intrinsics_vec128_xor(vector128 x0, vector128 x1) +{ + return ((vector128)(vec_xor((vector128)(x0), (vector128)(x1)))); +} + +#define Lib_IntVector_Intrinsics_vec128_zero \ + ((vector128){}) + +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#elif defined(__powerpc64__) // PowerPC 64 - this flag is for GCC only + +#if defined(HACL_CAN_COMPILE_VEC128) + +#include <altivec.h> +#include <string.h> // for memcpy +#include <stdint.h> + +// The main vector 128 type +// We can't use uint8_t, uint32_t, uint64_t... instead of unsigned char, +// unsigned int, unsigned long long: the compiler complains that the parameter +// combination is invalid. +typedef vector unsigned char vector128_8; +typedef vector unsigned int vector128_32; +typedef vector unsigned long long vector128_64; + +typedef vector128_8 Lib_IntVector_Intrinsics_vec128; +typedef vector128_8 vector128; + +#define Lib_IntVector_Intrinsics_vec128_load32_le(x) \ + ((vector128)((vector128_32)(vec_xl(0, (const unsigned int*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_load64_le(x) \ + ((vector128)((vector128_64)(vec_xl(0, (const unsigned long long*)((const uint8_t*)(x)))))) + +#define Lib_IntVector_Intrinsics_vec128_store32_le(x0, x1) \ + (vec_xst((vector128_32)(x1), 0, (unsigned int*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_store64_le(x0, x1) \ + (vec_xst((vector128_64)(x1), 0, (unsigned long long*)((uint8_t*)(x0)))) + +#define Lib_IntVector_Intrinsics_vec128_add32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) + ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_add64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) + ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_and(x0, x1) \ + ((vector128)(vec_and((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_eq32(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_32)(x0)), ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_eq64(x0, x1) \ + ((vector128)(vec_cmpeq(((vector128_64)(x0)), ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_extract32(x0, x1) \ + ((unsigned int)(vec_extract((vector128_32)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_extract64(x0, x1) \ + ((unsigned long long)(vec_extract((vector128_64)(x0), x1))) + +#define Lib_IntVector_Intrinsics_vec128_gt32(x0, x1) \ + ((vector128)((vector128_32)(((vector128_32)(x0)) > ((vector128_32)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_gt64(x0, x1) \ + ((vector128)((vector128_64)(((vector128_64)(x0)) > ((vector128_64)(x1))))) + +#define Lib_IntVector_Intrinsics_vec128_insert32(x0, x1, x2) \ + ((vector128)((vector128_32)vec_insert((unsigned int)(x1), (vector128_32)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_insert64(x0, x1, x2) \ + ((vector128)((vector128_64)vec_insert((unsigned long long)(x1), (vector128_64)(x0), x2))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high32(x0, x1) \ + ((vector128)((vector128_32)vec_mergel((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_high64(x0, x1) \ + ((vector128)((vector128_64)vec_mergel((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low32(x0, x1) \ + ((vector128)((vector128_32)vec_mergeh((vector128_32)(x0), (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_interleave_low64(x0, x1) \ + ((vector128)((vector128_64)vec_mergeh((vector128_64)(x0), (vector128_64)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_load32(x) \ + ((vector128)((vector128_32){ (unsigned int)(x), (unsigned int)(x), \ + (unsigned int)(x), (unsigned int)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_load32s(x0, x1, x2, x3) \ + ((vector128)((vector128_32){ (unsigned int)(x0), (unsigned int)(x1), (unsigned int)(x2), (unsigned int)(x3) })) + +#define Lib_IntVector_Intrinsics_vec128_load64(x) \ + ((vector128)((vector128_64){ (unsigned long long)(x), (unsigned long long)(x) })) + +#define Lib_IntVector_Intrinsics_vec128_lognot(x0) \ + ((vector128)(vec_xor((vector128)(x0), (vector128)vec_splat_u32(-1)))) + +#define Lib_IntVector_Intrinsics_vec128_mul64(x0, x1) \ + ((vector128)(vec_mule((vector128_32)(x0), \ + (vector128_32)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_or(x0, x1) \ + ((vector128)(vec_or((vector128)(x0), (vector128)(x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, x1) \ + ((vector128)(vec_rl((vector128_32)(x0), (vector128_32){ (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1), (unsigned int)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right32(x0, x1) \ + (Lib_IntVector_Intrinsics_vec128_rotate_left32(x0, (uint32_t)(32 - (x1)))) + +#define Lib_IntVector_Intrinsics_vec128_rotate_right_lanes32(x0, x1) \ + ((vector128)(vec_sld((vector128)(x0), (vector128)(x0), ((4 - (x1)) % 4) * 4))) + +#define Lib_IntVector_Intrinsics_vec128_shift_left64(x0, x1) \ + ((vector128)((vector128_64)vec_sl((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + +#define Lib_IntVector_Intrinsics_vec128_shift_right64(x0, x1) \ + ((vector128)((vector128_64)vec_sr((vector128_64)(x0), (vector128_64){ (unsigned long)(x1), (unsigned long)(x1) }))) + // Doesn't work with vec_splat_u64 #define Lib_IntVector_Intrinsics_vec128_smul64(x0, x1) \ ((vector128)(Lib_IntVector_Intrinsics_vec128_mul64(x0, ((vector128_64){ (unsigned long long)(x1), (unsigned long long)(x1) })))) @@ -762,6 +898,18 @@ typedef vector128_8 vector128; #define Lib_IntVector_Intrinsics_vec128_zero \ ((vector128){}) -#endif // IBM z architecture +#endif /* HACL_CAN_COMPILE_VEC128 */ + +#endif // PowerPC64 +// DEBUGGING: +// If libintvector_debug.h exists, use it to debug the current implementations. +// Note that some flags must be enabled for the debugging to be effective: +// see libintvector_debug.h for more details. +#if defined(__has_include) +#if __has_include("libintvector_debug.h") +#include "libintvector_debug.h" #endif +#endif + +#endif // __Vec_Intrin_H |