diff options
author | Kevin Jacobs <kjacobs@mozilla.com> | 2020-03-03 15:46:55 -0800 |
---|---|---|
committer | Kevin Jacobs <kjacobs@mozilla.com> | 2020-03-03 15:46:55 -0800 |
commit | 20ac22ba66bba2acfee5d6bf104389698e674a95 (patch) | |
tree | 1eb56031c973b1fa0c4b70819e0019e1c15bc4cd | |
parent | ea9715955f1ef3f1484f65d11c5dc9b14af2ead3 (diff) | |
download | nss-hg-20ac22ba66bba2acfee5d6bf104389698e674a95.tar.gz |
Backed out changeset d5deac55f543
-rw-r--r-- | automation/taskcluster/graph/src/extend.js | 6 | ||||
-rw-r--r-- | coreconf/arch.mk | 29 | ||||
-rw-r--r-- | coreconf/config.mk | 4 | ||||
-rw-r--r-- | lib/freebl/Makefile | 80 | ||||
-rw-r--r-- | lib/freebl/blapii.h | 1 | ||||
-rw-r--r-- | lib/freebl/blinit.c | 21 | ||||
-rw-r--r-- | lib/freebl/chacha20poly1305.c | 59 | ||||
-rw-r--r-- | lib/freebl/freebl.gyp | 83 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Chacha20Poly1305_256.c | 1176 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Chacha20Poly1305_256.h | 60 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Chacha20_Vec256.c | 876 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Chacha20_Vec256.h | 55 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Poly1305_256.c | 2120 | ||||
-rw-r--r-- | lib/freebl/verified/Hacl_Poly1305_256.h | 66 | ||||
-rw-r--r-- | nss-tool/hw-support.c | 1 |
15 files changed, 44 insertions, 4593 deletions
diff --git a/automation/taskcluster/graph/src/extend.js b/automation/taskcluster/graph/src/extend.js index 122340ed3..2440f03f8 100644 --- a/automation/taskcluster/graph/src/extend.js +++ b/automation/taskcluster/graph/src/extend.js @@ -101,7 +101,7 @@ queue.filter(task => { // Don't run all additional hardware tests on ARM. if (task.group == "Cipher" && task.platform == "aarch64" && task.env && (task.env.NSS_DISABLE_PCLMUL == "1" || task.env.NSS_DISABLE_HW_AES == "1" - || task.env.NSS_DISABLE_AVX == "1" || task.env.NSS_DISABLE_AVX2 == "1")) { + || task.env.NSS_DISABLE_AVX == "1")) { return false; } @@ -1015,10 +1015,6 @@ function scheduleTests(task_build, task_cert, test_base) { env: {NSS_DISABLE_AVX: "1"}, group: "Cipher" })); queue.scheduleTask(merge(cert_base_long, { - name: "Cipher tests", symbol: "NoAVX2", tests: "cipher", - env: {NSS_DISABLE_AVX2: "1"}, group: "Cipher" - })); - queue.scheduleTask(merge(cert_base_long, { name: "Cipher tests", symbol: "NoSSSE3|NEON", tests: "cipher", env: { NSS_DISABLE_ARM_NEON: "1", diff --git a/coreconf/arch.mk b/coreconf/arch.mk index 790372d34..79e56d510 100644 --- a/coreconf/arch.mk +++ b/coreconf/arch.mk @@ -140,35 +140,6 @@ ifeq ($(OS_ARCH),OS_2) endif ####################################################################### -# Master "Core Components" macros for Hardware features # -####################################################################### - -ifndef NSS_DISABLE_AVX2 - ifneq ($(CPU_ARCH),x86_64) - # Disable AVX2 entirely on non-Intel platforms - NSS_DISABLE_AVX2 = 1 - $(warning CPU_ARCH is not x86_64, disabling -mavx2) - else - ifdef CC_IS_CLANG - # Clang reports its version as an older gcc, but it's OK - NSS_DISABLE_AVX2 = 0 - else - ifneq (,$(filter 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) - NSS_DISABLE_AVX2 = 0 - endif - ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) - NSS_DISABLE_AVX2 = 0 - endif - endif - ifndef NSS_DISABLE_AVX2 - $(warning Unable to find gcc 4.8 or greater, disabling -Werror) - NSS_DISABLE_AVX2 = 1 - endif - endif - export NSS_DISABLE_AVX2 -endif #ndef NSS_DISABLE_AVX2 - -####################################################################### # Master "Core Components" macros for getting the OS target # ####################################################################### diff --git a/coreconf/config.mk b/coreconf/config.mk index 4eda497a2..704e3fa83 100644 --- a/coreconf/config.mk +++ b/coreconf/config.mk @@ -162,10 +162,6 @@ ifdef NSS_DISABLE_DBM DEFINES += -DNSS_DISABLE_DBM endif -ifdef NSS_DISABLE_AVX2 -DEFINES += -DNSS_DISABLE_AVX2 -endif - ifdef NSS_DISABLE_CHACHAPOLY DEFINES += -DNSS_DISABLE_CHACHAPOLY endif diff --git a/lib/freebl/Makefile b/lib/freebl/Makefile index 344ed311f..ce9d36f3a 100644 --- a/lib/freebl/Makefile +++ b/lib/freebl/Makefile @@ -85,11 +85,11 @@ endif # FREEBL_PRELINK_COMMAND # # This is an optional environment variable which can override the default -# prelink command. It could be used on systems that did something similiar to -# prelink but used a different command and syntax. The only requirement is the -# program must take the library as the last argument, the program must output -# the original library to standard out, and the program does not need to take -# any quoted or imbedded spaces in its arguments (except the path to the +# prelink command. It could be used on systems that did something similiar to +# prelink but used a different command and syntax. The only requirement is the +# program must take the library as the last argument, the program must output +# the original library to standard out, and the program does not need to take +# any quoted or imbedded spaces in its arguments (except the path to the # library itself, which can have imbedded spaces or special characters). # ifdef FREEBL_USE_PRELINK @@ -148,7 +148,7 @@ endif ifeq (OS2,$(OS_TARGET)) ASFILES = mpi_x86_os2.s - DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_ASSEMBLY_DIV_2DX1D DEFINES += -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD DEFINES += -DMP_IS_LITTLE_ENDIAN @@ -169,7 +169,7 @@ ifdef NS_USE_GCC else # MSVC MPI_SRCS += mpi_x86_asm.c - DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT -DMP_NO_MP_WORD ifdef BUILD_OPT OPTIMIZER += -Ox # maximum optimization for freebl @@ -220,7 +220,7 @@ ifeq ($(USE_N32),1) ifeq ($(NS_USE_GCC),1) ASFLAGS = -Wp,-P -Wp,-traditional -O -mips3 else - ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 + ASFLAGS = -O -OPT:Olimit=4000 -dollar -fullwarn -xansi -n32 -mips3 endif DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_USE_UINT_DIGIT @@ -253,12 +253,12 @@ ifeq ($(CPU_ARCH),x86_64) endif ifeq ($(CPU_ARCH),x86) ASFILES = mpi_x86.s - DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_ASSEMBLY_DIV_2DX1D -DMP_USE_UINT_DIGIT DEFINES += -DMP_IS_LITTLE_ENDIAN endif ifeq ($(CPU_ARCH),arm) - DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_USE_UINT_DIGIT DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 MPI_SRCS += mpi_arm.c @@ -283,7 +283,7 @@ ifneq ($(OS_TEST), ia64) # PA-RISC ASFILES += ret_cr16.s ifndef USE_64 - FREEBL_BUILD_SINGLE_SHLIB = + FREEBL_BUILD_SINGLE_SHLIB = HAVE_ABI32_INT32 = 1 HAVE_ABI32_FPU = 1 endif @@ -294,15 +294,15 @@ ifdef USE_ABI32_INT32 DEFINES += -DSHA_NO_LONG_LONG # avoid 64-bit arithmetic in SHA512 else ifdef USE_64 -# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits - MPI_SRCS += mpi_hp.c - ASFILES += hpma512.s hppa20.s +# this builds for DA2.0W (HP PA 2.0 Wide), the LP64 ABI, using 64-bit digits + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE else -# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model +# this builds for DA2.0 (HP PA 2.0 Narrow) ABI32_FPU model # (the 32-bit ABI with 64-bit registers) using 64-bit digits - MPI_SRCS += mpi_hp.c - ASFILES += hpma512.s hppa20.s + MPI_SRCS += mpi_hp.c + ASFILES += hpma512.s hppa20.s DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE ifndef NS_USE_GCC ARCHFLAG = -Aa +e +DA2.0 +DS2.0 @@ -337,7 +337,7 @@ else endif # NS_USE_GCC # Sun's WorkShop defines v8, v8plus and v9 architectures. -# gcc on Solaris defines v8 and v9 "cpus". +# gcc on Solaris defines v8 and v9 "cpus". # gcc's v9 is equivalent to Workshop's v8plus. # gcc's -m64 is equivalent to Workshop's v9 # We always use Sun's assembler, which uses Sun's naming convention. @@ -387,7 +387,7 @@ ifeq ($(CPU_ARCH),sparc) FPU_TARGET_OPTIMIZER = -xchip=ultra2 endif ifdef USE_ABI32_INT64 - # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, # 32-bit ABI, it uses 64-bit words, integer arithmetic, # no FPU (non-VIS cpus). # These flags were suggested by the compiler group for building @@ -400,7 +400,7 @@ ifeq ($(CPU_ARCH),sparc) SOLARIS_AS_FLAGS = -xarch=v8plus -K PIC endif ifdef USE_ABI32_FPU - # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, # 32-bit ABI, it uses FPU code, and 32-bit word size. # these flags were determined by running cc -### -fast and copying # the generated flag settings @@ -442,12 +442,12 @@ ifeq ($(CPU_ARCH),sparc) ### set flags for both GCC and Sun cc ifdef USE_ABI32_INT64 - # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, # 32-bit ABI, it uses 64-bit words, integer arithmetic, no FPU # best times are with no MP_ flags specified endif ifdef USE_ABI32_FPU - # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, + # this builds for Sparc v8+a ABI32_FPU architecture, 64-bit registers, # 32-bit ABI, it uses FPU code, and 32-bit word size MPI_SRCS += mpi_sparc.c ASFILES = mpv_sparcv8.s montmulfv8.s @@ -503,7 +503,7 @@ else else # Solaris x86 DEFINES += -DMP_USE_UINT_DIGIT - DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE + DEFINES += -DMP_ASSEMBLY_MULTIPLY -DMP_ASSEMBLY_SQUARE DEFINES += -DMP_ASSEMBLY_DIV_2DX1D ASFILES = mpi_i86pc.s ifndef NS_USE_GCC @@ -526,12 +526,8 @@ ifneq ($(shell $(CC) -? 2>&1 >/dev/null </dev/null | sed -e 's/:.*//;1q'),lcc) HAVE_INT128_SUPPORT = 1 DEFINES += -DHAVE_INT128_SUPPORT endif - ifneq (,$(filter 4.8 4.9,$(word 1,$(GCC_VERSION)).$(word 2,$(GCC_VERSION)))) - NSS_DISABLE_AVX2 = 1 - endif ifeq (,$(filter 0 1 2 3 4,$(word 1,$(GCC_VERSION)))) HAVE_INT128_SUPPORT = 1 - NSS_DISABLE_AVX2 = 0 DEFINES += -DHAVE_INT128_SUPPORT endif endif @@ -544,11 +540,7 @@ endif ifndef NSS_DISABLE_CHACHAPOLY ifeq ($(CPU_ARCH),x86_64) - ifndef NSS_DISABLE_AVX2 - EXTRA_SRCS += Hacl_Poly1305_256.c Hacl_Chacha20_Vec256.c Hacl_Chacha20Poly1305_256.c - else - EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c - endif # NSS_DISABLE_AVX2 + EXTRA_SRCS += Hacl_Poly1305_128.c Hacl_Chacha20_Vec128.c Hacl_Chacha20Poly1305_128.c endif # x86_64 VERIFIED_SRCS += Hacl_Poly1305_32.c Hacl_Chacha20.c Hacl_Chacha20Poly1305_32.c @@ -638,7 +630,7 @@ ifdef FREEBL_BUILD_SINGLE_SHLIB ################### Single shared lib stuff ######################### SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB -ALL_TRASH += $(SINGLE_SHLIB_DIR) +ALL_TRASH += $(SINGLE_SHLIB_DIR) $(SINGLE_SHLIB_DIR): -mkdir -p $(SINGLE_SHLIB_DIR) @@ -652,7 +644,7 @@ endif ifdef NEED_STUB_BUILD SINGLE_SHLIB_DIR = $(OBJDIR)/$(OS_TARGET)_SINGLE_SHLIB -ALL_TRASH += $(SINGLE_SHLIB_DIR) +ALL_TRASH += $(SINGLE_SHLIB_DIR) $(SINGLE_SHLIB_DIR): -mkdir $(SINGLE_SHLIB_DIR) @@ -666,7 +658,7 @@ endif ######################## ABI32_FPU stuff ######################### ifdef HAVE_ABI32_FPU ABI32_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_FPU -ALL_TRASH += $(ABI32_FPU_DIR) +ALL_TRASH += $(ABI32_FPU_DIR) $(ABI32_FPU_DIR): -mkdir $(ABI32_FPU_DIR) @@ -679,7 +671,7 @@ endif ######################## ABI32_INT32 stuff ######################### ifdef HAVE_ABI32_INT32 ABI32_INT32_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT32 -ALL_TRASH += $(ABI32_INT32_DIR) +ALL_TRASH += $(ABI32_INT32_DIR) $(ABI32_INT32_DIR): -mkdir $(ABI32_INT32_DIR) @@ -692,7 +684,7 @@ endif ######################## ABI32_INT64 stuff ######################### ifdef HAVE_ABI32_INT64 ABI32_INT64_DIR = $(OBJDIR)/$(OS_TARGET)_ABI32_INT64 -ALL_TRASH += $(ABI32_INT64_DIR) +ALL_TRASH += $(ABI32_INT64_DIR) $(ABI32_INT64_DIR): -mkdir $(ABI32_INT64_DIR) @@ -709,7 +701,7 @@ endif ######################## ABI64_FPU stuff ######################### ifdef HAVE_ABI64_FPU ABI64_FPU_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_FPU -ALL_TRASH += $(ABI64_FPU_DIR) +ALL_TRASH += $(ABI64_FPU_DIR) $(ABI64_FPU_DIR): -mkdir $(ABI64_FPU_DIR) @@ -722,7 +714,7 @@ endif ######################## ABI64_INT stuff ######################### ifdef HAVE_ABI64_INT ABI64_INT_DIR = $(OBJDIR)/$(OS_TARGET)_ABI64_INT -ALL_TRASH += $(ABI64_INT_DIR) +ALL_TRASH += $(ABI64_INT_DIR) $(ABI64_INT_DIR): -mkdir $(ABI64_INT_DIR) @@ -793,12 +785,6 @@ $(OBJDIR)/$(PROG_PREFIX)rijndael$(OBJ_SUFFIX): CFLAGS += -mcrypto -maltivec -mvs endif endif -$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes -$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes +$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes $(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_128$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -maes -mpclmul - -ifndef NSS_DISABLE_AVX2 -$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx2 -maes -$(OBJDIR)/$(PROG_PREFIX)Hacl_Chacha20_Vec256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -mavx2 -maes -$(OBJDIR)/$(PROG_PREFIX)Hacl_Poly1305_256$(OBJ_SUFFIX): CFLAGS += -mssse3 -msse4 -mavx -mavx2 -maes -mpclmul -endif diff --git a/lib/freebl/blapii.h b/lib/freebl/blapii.h index 157523f78..d30c82b21 100644 --- a/lib/freebl/blapii.h +++ b/lib/freebl/blapii.h @@ -80,7 +80,6 @@ SECStatus generate_prime(mp_int *prime, int primeLen); PRBool aesni_support(); PRBool clmul_support(); PRBool avx_support(); -PRBool avx2_support(); PRBool ssse3_support(); PRBool sse4_1_support(); PRBool sse4_2_support(); diff --git a/lib/freebl/blinit.c b/lib/freebl/blinit.c index c9c0dda59..dff786451 100644 --- a/lib/freebl/blinit.c +++ b/lib/freebl/blinit.c @@ -27,7 +27,6 @@ static PRCallOnceType coFreeblInit; static PRBool aesni_support_ = PR_FALSE; static PRBool clmul_support_ = PR_FALSE; static PRBool avx_support_ = PR_FALSE; -static PRBool avx2_support_ = PR_FALSE; static PRBool ssse3_support_ = PR_FALSE; static PRBool sse4_1_support_ = PR_FALSE; static PRBool sse4_2_support_ = PR_FALSE; @@ -76,43 +75,28 @@ check_xcr0_ymm() #define ECX_XSAVE (1 << 26) #define ECX_OSXSAVE (1 << 27) #define ECX_AVX (1 << 28) -#define EBX_AVX2 (1 << 5) -#define EBX_BMI1 (1 << 3) -#define EBX_BMI2 (1 << 8) -#define ECX_FMA (1 << 12) -#define ECX_MOVBE (1 << 22) #define ECX_SSSE3 (1 << 9) #define ECX_SSE4_1 (1 << 19) #define ECX_SSE4_2 (1 << 20) #define AVX_BITS (ECX_XSAVE | ECX_OSXSAVE | ECX_AVX) -#define AVX2_EBX_BITS (EBX_AVX2 | EBX_BMI1 | EBX_BMI2) -#define AVX2_ECX_BITS (ECX_FMA | ECX_MOVBE) void CheckX86CPUSupport() { unsigned long eax, ebx, ecx, edx; - unsigned long eax7, ebx7, ecx7, edx7; char *disable_hw_aes = PR_GetEnvSecure("NSS_DISABLE_HW_AES"); char *disable_pclmul = PR_GetEnvSecure("NSS_DISABLE_PCLMUL"); char *disable_avx = PR_GetEnvSecure("NSS_DISABLE_AVX"); - char *disable_avx2 = PR_GetEnvSecure("NSS_DISABLE_AVX2"); char *disable_ssse3 = PR_GetEnvSecure("NSS_DISABLE_SSSE3"); char *disable_sse4_1 = PR_GetEnvSecure("NSS_DISABLE_SSE4_1"); char *disable_sse4_2 = PR_GetEnvSecure("NSS_DISABLE_SSE4_2"); freebl_cpuid(1, &eax, &ebx, &ecx, &edx); - freebl_cpuid(7, &eax7, &ebx7, &ecx7, &edx7); aesni_support_ = (PRBool)((ecx & ECX_AESNI) != 0 && disable_hw_aes == NULL); clmul_support_ = (PRBool)((ecx & ECX_CLMUL) != 0 && disable_pclmul == NULL); /* For AVX we check AVX, OSXSAVE, and XSAVE * as well as XMM and YMM state. */ avx_support_ = (PRBool)((ecx & AVX_BITS) == AVX_BITS) && check_xcr0_ymm() && disable_avx == NULL; - /* For AVX2 we check AVX2, BMI1, BMI2, FMA, MOVBE. - * We do not check for AVX above. */ - avx2_support_ = (PRBool)((ebx7 & AVX2_EBX_BITS) == AVX2_EBX_BITS && - (ecx & AVX2_ECX_BITS) == AVX2_ECX_BITS && - disable_avx2 == NULL); ssse3_support_ = (PRBool)((ecx & ECX_SSSE3) != 0 && disable_ssse3 == NULL); sse4_1_support_ = (PRBool)((ecx & ECX_SSE4_1) != 0 && @@ -400,11 +384,6 @@ avx_support() return avx_support_; } PRBool -avx2_support() -{ - return avx2_support_; -} -PRBool ssse3_support() { return ssse3_support_; diff --git a/lib/freebl/chacha20poly1305.c b/lib/freebl/chacha20poly1305.c index 1e123d601..4daba2acc 100644 --- a/lib/freebl/chacha20poly1305.c +++ b/lib/freebl/chacha20poly1305.c @@ -15,17 +15,9 @@ #include "blapii.h" #include "chacha20poly1305.h" -// There are three implementations of ChaCha20Poly1305: -// 1) 128-bit with AVX hardware acceleration used on x64 -// 2) 256-bit with AVX2 hardware acceleration used on x64 -// 3) 32-bit used on all other platforms - -// On x64 when AVX2 and other necessary registers are available, -// the 256bit-verctorized version will be used. When AVX2 features -// are unavailable or disabled but AVX registers are available, the -// 128bit-vectorized version will be used. In all other cases the -// scalar version of the HACL* code will be used. - +// There are two implementations of ChaCha20Poly1305: +// 1) 128-bit with hardware acceleration used on x64 +// 2) 32-bit used on all other platforms // Instead of including the headers (they bring other things we don't want), // we declare the functions here. // Usage is guarded by runtime checks of required hardware features. @@ -43,19 +35,6 @@ Hacl_Chacha20Poly1305_128_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, uint8_t *aad, uint32_t mlen, uint8_t *m, uint8_t *cipher, uint8_t *mac); -// Forward declaration from Hacl_Chacha20_Vec256.h and Hacl_Chacha20Poly1305_256.h. -extern void Hacl_Chacha20_Vec256_chacha20_encrypt_256(uint32_t len, uint8_t *out, - uint8_t *text, uint8_t *key, - uint8_t *n1, uint32_t ctr); -extern void -Hacl_Chacha20Poly1305_256_aead_encrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, - uint8_t *aad, uint32_t mlen, uint8_t *m, - uint8_t *cipher, uint8_t *mac); -extern uint32_t -Hacl_Chacha20Poly1305_256_aead_decrypt(uint8_t *k, uint8_t *n1, uint32_t aadlen, - uint8_t *aad, uint32_t mlen, uint8_t *m, - uint8_t *cipher, uint8_t *mac); - // Forward declaration from Hacl_Chacha20.h and Hacl_Chacha20Poly1305_32.h. extern void Hacl_Chacha20_chacha20_encrypt(uint32_t len, uint8_t *out, uint8_t *text, uint8_t *key, @@ -134,15 +113,7 @@ ChaCha20Xor(uint8_t *output, uint8_t *block, uint32_t len, uint8_t *k, { #ifdef NSS_X64 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr); -#else - if (avx2_support()) { - Hacl_Chacha20_Vec256_chacha20_encrypt_256(len, output, block, k, nonce, ctr); - } else { - Hacl_Chacha20_Vec128_chacha20_encrypt_128(len, output, block, k, nonce, ctr); - } -#endif } else #endif { @@ -196,21 +167,9 @@ ChaCha20Poly1305_Seal(const ChaCha20Poly1305Context *ctx, unsigned char *output, #ifdef NSS_X64 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 Hacl_Chacha20Poly1305_128_aead_encrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, (uint8_t *)input, output, output + inputLen); -#else - if (avx2_support()) { - Hacl_Chacha20Poly1305_256_aead_encrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, - (uint8_t *)input, output, output + inputLen); - } else { - Hacl_Chacha20Poly1305_128_aead_encrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, inputLen, - (uint8_t *)input, output, output + inputLen); - } -#endif } else #endif { @@ -258,21 +217,9 @@ ChaCha20Poly1305_Open(const ChaCha20Poly1305Context *ctx, unsigned char *output, uint32_t res = 1; #ifdef NSS_X64 if (ssse3_support() && sse4_1_support() && avx_support()) { -#ifdef NSS_DISABLE_AVX2 res = Hacl_Chacha20Poly1305_128_aead_decrypt( (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); -#else - if (avx2_support()) { - res = Hacl_Chacha20Poly1305_256_aead_decrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, - (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); - } else { - res = Hacl_Chacha20Poly1305_128_aead_decrypt( - (uint8_t *)ctx->key, (uint8_t *)nonce, adLen, (uint8_t *)ad, ciphertextLen, - (uint8_t *)output, (uint8_t *)input, (uint8_t *)input + ciphertextLen); - } -#endif } else #endif { diff --git a/lib/freebl/freebl.gyp b/lib/freebl/freebl.gyp index 48230e9cf..d46bd1949 100644 --- a/lib/freebl/freebl.gyp +++ b/lib/freebl/freebl.gyp @@ -54,10 +54,11 @@ ], }, { - 'target_name': 'hw-acc-crypto-avx', + # TODO: make this so that all hardware accelerated code is in here. + 'target_name': 'hw-acc-crypto', 'type': 'static_library', # 'sources': [ - # All AVX hardware accelerated crypto currently requires x64 + # All hardware accelerated crypto currently requires x64 # ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports' @@ -117,72 +118,6 @@ ], }, { - 'target_name': 'hw-acc-crypto-avx2', - 'type': 'static_library', - # 'sources': [ - # All AVX2 hardware accelerated crypto currently requires x64 - # ], - 'dependencies': [ - '<(DEPTH)/exports.gyp:nss_exports' - ], - 'conditions': [ - [ 'target_arch=="x64"', { - 'cflags': [ - '-mssse3', - '-msse4' - ], - 'cflags_mozilla': [ - '-mssse3', - '-msse4', - '-mpclmul', - '-maes', - '-mavx', - '-mavx2', - ], - # GCC doesn't define this. - 'defines': [ - '__SSSE3__', - ], - }], - [ 'OS=="linux" or OS=="android" or OS=="dragonfly" or OS=="freebsd" or \ - OS=="netbsd" or OS=="openbsd"', { - 'cflags': [ - '-mpclmul', - '-maes', - '-mavx', - '-mavx2', - ], - }], - # macOS build doesn't use cflags. - [ 'OS=="mac" or OS=="ios"', { - 'xcode_settings': { - 'OTHER_CFLAGS': [ - '-mssse3', - '-msse4', - '-mpclmul', - '-maes', - '-mavx', - '-mavx2', - ], - }, - }], - [ 'target_arch=="arm"', { - # Gecko doesn't support non-NEON platform on Android, but tier-3 - # platform such as Linux/arm will need it - 'cflags_mozilla': [ - '-mfpu=neon' - ], - }], - [ 'target_arch=="x64"', { - 'sources': [ - 'verified/Hacl_Poly1305_256.c', - 'verified/Hacl_Chacha20_Vec256.c', - 'verified/Hacl_Chacha20Poly1305_256.c', - ], - }], - ], - }, - { 'target_name': 'gcm-aes-x86_c_lib', 'type': 'static_library', 'sources': [ @@ -318,8 +253,7 @@ ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', - 'hw-acc-crypto-avx', - 'hw-acc-crypto-avx2', + 'hw-acc-crypto', ], 'conditions': [ [ 'target_arch=="ia32" or target_arch=="x64"', { @@ -380,8 +314,7 @@ ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', - 'hw-acc-crypto-avx', - 'hw-acc-crypto-avx2', + 'hw-acc-crypto', ], 'conditions': [ [ 'target_arch=="ia32" or target_arch=="x64"', { @@ -461,8 +394,7 @@ 'type': 'shared_library', 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', - 'hw-acc-crypto-avx', - 'hw-acc-crypto-avx2', + 'hw-acc-crypto', ], }, { @@ -478,8 +410,7 @@ ], 'dependencies': [ '<(DEPTH)/exports.gyp:nss_exports', - 'hw-acc-crypto-avx', - 'hw-acc-crypto-avx2', + 'hw-acc-crypto', ], 'asflags_mozilla': [ '-mcpu=v9', '-Wa,-xarch=v9a' diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c deleted file mode 100644 index dd774816c..000000000 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.c +++ /dev/null @@ -1,1176 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "Hacl_Chacha20Poly1305_256.h" - -static inline void -poly1305_padded_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint32_t len, uint8_t *text) -{ - uint32_t n1 = len / (uint32_t)16U; - uint32_t r = len % (uint32_t)16U; - uint8_t *blocks = text; - uint8_t *rem1 = text + n1 * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 *pre0 = ctx + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *acc0 = ctx; - uint32_t sz_block = (uint32_t)64U; - uint32_t len0 = n1 * (uint32_t)16U / sz_block * sz_block; - uint8_t *t00 = blocks; - if (len0 > (uint32_t)0U) { - uint32_t bs = (uint32_t)64U; - uint8_t *text0 = t00; - Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc0, text0); - uint32_t len1 = len0 - bs; - uint8_t *text1 = t00 + bs; - uint32_t nb = len1 / bs; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block); - Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 - mask2610 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 - m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 - m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); - Lib_IntVector_Intrinsics_vec256 - t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); - Lib_IntVector_Intrinsics_vec256 - t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); - Lib_IntVector_Intrinsics_vec256 - t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); - Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask2610); - Lib_IntVector_Intrinsics_vec256 - t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask2610); - Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask2610); - Lib_IntVector_Intrinsics_vec256 - t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); - Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask2610); - Lib_IntVector_Intrinsics_vec256 - o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 o00 = o5; - Lib_IntVector_Intrinsics_vec256 o11 = o10; - Lib_IntVector_Intrinsics_vec256 o21 = o20; - Lib_IntVector_Intrinsics_vec256 o31 = o30; - Lib_IntVector_Intrinsics_vec256 o41 = o40; - e[0U] = o00; - e[1U] = o11; - e[2U] = o21; - e[3U] = o31; - e[4U] = o41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *rn = pre0 + (uint32_t)10U; - Lib_IntVector_Intrinsics_vec256 *rn5 = pre0 + (uint32_t)15U; - Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = acc0[0U]; - Lib_IntVector_Intrinsics_vec256 f110 = acc0[1U]; - Lib_IntVector_Intrinsics_vec256 f120 = acc0[2U]; - Lib_IntVector_Intrinsics_vec256 f130 = acc0[3U]; - Lib_IntVector_Intrinsics_vec256 f140 = acc0[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); - Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); - Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); - Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); - Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); - Lib_IntVector_Intrinsics_vec256 - a01 = - Lib_IntVector_Intrinsics_vec256_add64(a0, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); - Lib_IntVector_Intrinsics_vec256 - a11 = - Lib_IntVector_Intrinsics_vec256_add64(a1, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); - Lib_IntVector_Intrinsics_vec256 - a21 = - Lib_IntVector_Intrinsics_vec256_add64(a2, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); - Lib_IntVector_Intrinsics_vec256 - a31 = - Lib_IntVector_Intrinsics_vec256_add64(a3, - Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); - Lib_IntVector_Intrinsics_vec256 - a41 = - Lib_IntVector_Intrinsics_vec256_add64(a4, - Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); - Lib_IntVector_Intrinsics_vec256 - a02 = - Lib_IntVector_Intrinsics_vec256_add64(a01, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); - Lib_IntVector_Intrinsics_vec256 - a12 = - Lib_IntVector_Intrinsics_vec256_add64(a11, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); - Lib_IntVector_Intrinsics_vec256 - a22 = - Lib_IntVector_Intrinsics_vec256_add64(a21, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); - Lib_IntVector_Intrinsics_vec256 - a32 = - Lib_IntVector_Intrinsics_vec256_add64(a31, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); - Lib_IntVector_Intrinsics_vec256 - a42 = - Lib_IntVector_Intrinsics_vec256_add64(a41, - Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); - Lib_IntVector_Intrinsics_vec256 t01 = a04; - Lib_IntVector_Intrinsics_vec256 t1 = a14; - Lib_IntVector_Intrinsics_vec256 t2 = a24; - Lib_IntVector_Intrinsics_vec256 t3 = a34; - Lib_IntVector_Intrinsics_vec256 t4 = a44; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o01 = x02; - Lib_IntVector_Intrinsics_vec256 o12 = x12; - Lib_IntVector_Intrinsics_vec256 o22 = x21; - Lib_IntVector_Intrinsics_vec256 o32 = x32; - Lib_IntVector_Intrinsics_vec256 o42 = x42; - acc0[0U] = o01; - acc0[1U] = o12; - acc0[2U] = o22; - acc0[3U] = o32; - acc0[4U] = o42; - Lib_IntVector_Intrinsics_vec256 f100 = acc0[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = acc0[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = acc0[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = acc0[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = acc0[4U]; - Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; - Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); - Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); - Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); - Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); - Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); - acc0[0U] = o0; - acc0[1U] = o1; - acc0[2U] = o2; - acc0[3U] = o3; - acc0[4U] = o4; - } - Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc0, pre0); - } - uint32_t len1 = n1 * (uint32_t)16U - len0; - uint8_t *t10 = blocks + len0; - uint32_t nb = len1 / (uint32_t)16U; - uint32_t rem2 = len1 % (uint32_t)16U; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *block = t10 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(block); - uint64_t lo = u0; - uint64_t u = load64_le(block + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f41 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *r1 = pre0; - Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; - Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t01 = a06; - Lib_IntVector_Intrinsics_vec256 t11 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc0[0U] = o0; - acc0[1U] = o1; - acc0[2U] = o2; - acc0[3U] = o3; - acc0[4U] = o4; - } - if (rem2 > (uint32_t)0U) { - uint8_t *last1 = t10 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last1, rem2 * sizeof(last1[0U])); - uint64_t u0 = load64_le(tmp); - uint64_t lo = u0; - uint64_t u = load64_le(tmp + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f4 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f4; - uint64_t b = (uint64_t)1U << rem2 * (uint32_t)8U % (uint32_t)26U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 fi = e[rem2 * (uint32_t)8U / (uint32_t)26U]; - e[rem2 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); - Lib_IntVector_Intrinsics_vec256 *r1 = pre0; - Lib_IntVector_Intrinsics_vec256 *r5 = pre0 + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; - Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc0[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc0[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc0[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc0[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc0[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t01 = a06; - Lib_IntVector_Intrinsics_vec256 t11 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc0[0U] = o0; - acc0[1U] = o1; - acc0[2U] = o2; - acc0[3U] = o3; - acc0[4U] = o4; - } - uint8_t tmp[16U] = { 0U }; - memcpy(tmp, rem1, r * sizeof(rem1[0U])); - if (r > (uint32_t)0U) { - Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(tmp); - uint64_t lo = u0; - uint64_t u = load64_le(tmp + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f41 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *r1 = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r1[0U]; - Lib_IntVector_Intrinsics_vec256 r11 = r1[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r1[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r1[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r1[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r11, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r11, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t0 = a06; - Lib_IntVector_Intrinsics_vec256 t1 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; - return; - } -} - -static inline void -poly1305_do_256( - uint8_t *k, - uint32_t aadlen, - uint8_t *aad, - uint32_t mlen, - uint8_t *m, - uint8_t *out) -{ - Lib_IntVector_Intrinsics_vec256 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint8_t block[16U] = { 0U }; - Hacl_Poly1305_256_poly1305_init(ctx, k); - poly1305_padded_256(ctx, aadlen, aad); - poly1305_padded_256(ctx, mlen, m); - store64_le(block, (uint64_t)aadlen); - store64_le(block + (uint32_t)8U, (uint64_t)mlen); - Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(block); - uint64_t lo = u0; - uint64_t u = load64_le(block + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f41 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *r = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t0 = a06; - Lib_IntVector_Intrinsics_vec256 t1 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; - Hacl_Poly1305_256_poly1305_finish(out, k, ctx); -} - -void -Hacl_Chacha20Poly1305_256_aead_encrypt( - uint8_t *k, - uint8_t *n1, - uint32_t aadlen, - uint8_t *aad, - uint32_t mlen, - uint8_t *m, - uint8_t *cipher, - uint8_t *mac) -{ - Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, cipher, m, k, n1, (uint32_t)1U); - uint8_t tmp[64U] = { 0U }; - Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); - uint8_t *key = tmp; - poly1305_do_256(key, aadlen, aad, mlen, cipher, mac); -} - -uint32_t -Hacl_Chacha20Poly1305_256_aead_decrypt( - uint8_t *k, - uint8_t *n1, - uint32_t aadlen, - uint8_t *aad, - uint32_t mlen, - uint8_t *m, - uint8_t *cipher, - uint8_t *mac) -{ - uint8_t computed_mac[16U] = { 0U }; - uint8_t tmp[64U] = { 0U }; - Hacl_Chacha20_Vec256_chacha20_encrypt_256((uint32_t)64U, tmp, tmp, k, n1, (uint32_t)0U); - uint8_t *key = tmp; - poly1305_do_256(key, aadlen, aad, mlen, cipher, computed_mac); - uint8_t res = (uint8_t)255U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - uint8_t uu____0 = FStar_UInt8_eq_mask(computed_mac[i], mac[i]); - res = uu____0 & res; - } - uint8_t z = res; - if (z == (uint8_t)255U) { - Hacl_Chacha20_Vec256_chacha20_encrypt_256(mlen, m, cipher, k, n1, (uint32_t)1U); - return (uint32_t)0U; - } - return (uint32_t)1U; -} diff --git a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h b/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h deleted file mode 100644 index a9bb99f4f..000000000 --- a/lib/freebl/verified/Hacl_Chacha20Poly1305_256.h +++ /dev/null @@ -1,60 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" -#include <string.h> -#include <stdbool.h> - -#ifndef __Hacl_Chacha20Poly1305_256_H -#define __Hacl_Chacha20Poly1305_256_H - -#include "Hacl_Kremlib.h" -#include "Hacl_Chacha20_Vec256.h" -#include "Hacl_Poly1305_256.h" - -void -Hacl_Chacha20Poly1305_256_aead_encrypt( - uint8_t *k, - uint8_t *n1, - uint32_t aadlen, - uint8_t *aad, - uint32_t mlen, - uint8_t *m, - uint8_t *cipher, - uint8_t *mac); - -uint32_t -Hacl_Chacha20Poly1305_256_aead_decrypt( - uint8_t *k, - uint8_t *n1, - uint32_t aadlen, - uint8_t *aad, - uint32_t mlen, - uint8_t *m, - uint8_t *cipher, - uint8_t *mac); - -#define __Hacl_Chacha20Poly1305_256_H_DEFINED -#endif diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.c b/lib/freebl/verified/Hacl_Chacha20_Vec256.c deleted file mode 100644 index 2e80c351f..000000000 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.c +++ /dev/null @@ -1,876 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "Hacl_Chacha20_Vec256.h" - -static inline void -double_round_256(Lib_IntVector_Intrinsics_vec256 *st) -{ - st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); - Lib_IntVector_Intrinsics_vec256 std = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); - st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std, (uint32_t)16U); - st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); - Lib_IntVector_Intrinsics_vec256 std0 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); - st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std0, (uint32_t)12U); - st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[4U]); - Lib_IntVector_Intrinsics_vec256 std1 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[0U]); - st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std1, (uint32_t)8U); - st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[12U]); - Lib_IntVector_Intrinsics_vec256 std2 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[8U]); - st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std2, (uint32_t)7U); - st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); - Lib_IntVector_Intrinsics_vec256 std3 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); - st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std3, (uint32_t)16U); - st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); - Lib_IntVector_Intrinsics_vec256 std4 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); - st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std4, (uint32_t)12U); - st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[5U]); - Lib_IntVector_Intrinsics_vec256 std5 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[1U]); - st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std5, (uint32_t)8U); - st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[13U]); - Lib_IntVector_Intrinsics_vec256 std6 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[9U]); - st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std6, (uint32_t)7U); - st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); - Lib_IntVector_Intrinsics_vec256 std7 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); - st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std7, (uint32_t)16U); - st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); - Lib_IntVector_Intrinsics_vec256 std8 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); - st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std8, (uint32_t)12U); - st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[6U]); - Lib_IntVector_Intrinsics_vec256 std9 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[2U]); - st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std9, (uint32_t)8U); - st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[14U]); - Lib_IntVector_Intrinsics_vec256 std10 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[10U]); - st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std10, (uint32_t)7U); - st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); - Lib_IntVector_Intrinsics_vec256 std11 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); - st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std11, (uint32_t)16U); - st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); - Lib_IntVector_Intrinsics_vec256 std12 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); - st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std12, (uint32_t)12U); - st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[7U]); - Lib_IntVector_Intrinsics_vec256 std13 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[3U]); - st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std13, (uint32_t)8U); - st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[15U]); - Lib_IntVector_Intrinsics_vec256 std14 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[11U]); - st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std14, (uint32_t)7U); - st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); - Lib_IntVector_Intrinsics_vec256 std15 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); - st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std15, (uint32_t)16U); - st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); - Lib_IntVector_Intrinsics_vec256 std16 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); - st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std16, (uint32_t)12U); - st[0U] = Lib_IntVector_Intrinsics_vec256_add32(st[0U], st[5U]); - Lib_IntVector_Intrinsics_vec256 std17 = Lib_IntVector_Intrinsics_vec256_xor(st[15U], st[0U]); - st[15U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std17, (uint32_t)8U); - st[10U] = Lib_IntVector_Intrinsics_vec256_add32(st[10U], st[15U]); - Lib_IntVector_Intrinsics_vec256 std18 = Lib_IntVector_Intrinsics_vec256_xor(st[5U], st[10U]); - st[5U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std18, (uint32_t)7U); - st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); - Lib_IntVector_Intrinsics_vec256 std19 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); - st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std19, (uint32_t)16U); - st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); - Lib_IntVector_Intrinsics_vec256 std20 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); - st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std20, (uint32_t)12U); - st[1U] = Lib_IntVector_Intrinsics_vec256_add32(st[1U], st[6U]); - Lib_IntVector_Intrinsics_vec256 std21 = Lib_IntVector_Intrinsics_vec256_xor(st[12U], st[1U]); - st[12U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std21, (uint32_t)8U); - st[11U] = Lib_IntVector_Intrinsics_vec256_add32(st[11U], st[12U]); - Lib_IntVector_Intrinsics_vec256 std22 = Lib_IntVector_Intrinsics_vec256_xor(st[6U], st[11U]); - st[6U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std22, (uint32_t)7U); - st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); - Lib_IntVector_Intrinsics_vec256 std23 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); - st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std23, (uint32_t)16U); - st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); - Lib_IntVector_Intrinsics_vec256 std24 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); - st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std24, (uint32_t)12U); - st[2U] = Lib_IntVector_Intrinsics_vec256_add32(st[2U], st[7U]); - Lib_IntVector_Intrinsics_vec256 std25 = Lib_IntVector_Intrinsics_vec256_xor(st[13U], st[2U]); - st[13U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std25, (uint32_t)8U); - st[8U] = Lib_IntVector_Intrinsics_vec256_add32(st[8U], st[13U]); - Lib_IntVector_Intrinsics_vec256 std26 = Lib_IntVector_Intrinsics_vec256_xor(st[7U], st[8U]); - st[7U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std26, (uint32_t)7U); - st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); - Lib_IntVector_Intrinsics_vec256 std27 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); - st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std27, (uint32_t)16U); - st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); - Lib_IntVector_Intrinsics_vec256 std28 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); - st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std28, (uint32_t)12U); - st[3U] = Lib_IntVector_Intrinsics_vec256_add32(st[3U], st[4U]); - Lib_IntVector_Intrinsics_vec256 std29 = Lib_IntVector_Intrinsics_vec256_xor(st[14U], st[3U]); - st[14U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std29, (uint32_t)8U); - st[9U] = Lib_IntVector_Intrinsics_vec256_add32(st[9U], st[14U]); - Lib_IntVector_Intrinsics_vec256 std30 = Lib_IntVector_Intrinsics_vec256_xor(st[4U], st[9U]); - st[4U] = Lib_IntVector_Intrinsics_vec256_rotate_left32(std30, (uint32_t)7U); -} - -static inline void -chacha20_core_256( - Lib_IntVector_Intrinsics_vec256 *k, - Lib_IntVector_Intrinsics_vec256 *ctx, - uint32_t ctr) -{ - memcpy(k, ctx, (uint32_t)16U * sizeof(ctx[0U])); - uint32_t ctr_u32 = (uint32_t)8U * ctr; - Lib_IntVector_Intrinsics_vec256 cv = Lib_IntVector_Intrinsics_vec256_load32(ctr_u32); - k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - double_round_256(k); - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 *os = k; - Lib_IntVector_Intrinsics_vec256 x = Lib_IntVector_Intrinsics_vec256_add32(k[i], ctx[i]); - os[i] = x; - } - k[12U] = Lib_IntVector_Intrinsics_vec256_add32(k[12U], cv); -} - -static inline void -chacha20_init_256(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *k, uint8_t *n1, uint32_t ctr) -{ - uint32_t ctx1[16U] = { 0U }; - uint32_t *uu____0 = ctx1; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)4U; i++) { - uint32_t *os = uu____0; - uint32_t x = Hacl_Impl_Chacha20_Vec_chacha20_constants[i]; - os[i] = x; - } - uint32_t *uu____1 = ctx1 + (uint32_t)4U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)8U; i++) { - uint32_t *os = uu____1; - uint8_t *bj = k + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } - ctx1[12U] = ctr; - uint32_t *uu____2 = ctx1 + (uint32_t)13U; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)3U; i++) { - uint32_t *os = uu____2; - uint8_t *bj = n1 + i * (uint32_t)4U; - uint32_t u = load32_le(bj); - uint32_t r = u; - uint32_t x = r; - os[i] = x; - } - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 *os = ctx; - uint32_t x = ctx1[i]; - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_load32(x); - os[i] = x0; - } - Lib_IntVector_Intrinsics_vec256 - ctr1 = - Lib_IntVector_Intrinsics_vec256_load32s((uint32_t)0U, - (uint32_t)1U, - (uint32_t)2U, - (uint32_t)3U, - (uint32_t)4U, - (uint32_t)5U, - (uint32_t)6U, - (uint32_t)7U); - Lib_IntVector_Intrinsics_vec256 c12 = ctx[12U]; - ctx[12U] = Lib_IntVector_Intrinsics_vec256_add32(c12, ctr1); -} - -void -Hacl_Chacha20_Vec256_chacha20_encrypt_256( - uint32_t len, - uint8_t *out, - uint8_t *text, - uint8_t *key, - uint8_t *n1, - uint32_t ctr) -{ - Lib_IntVector_Intrinsics_vec256 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_init_256(ctx, key, n1, ctr); - uint32_t rem1 = len % (uint32_t)512U; - uint32_t nb = len / (uint32_t)512U; - uint32_t rem2 = len % (uint32_t)512U; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *uu____0 = out + i * (uint32_t)512U; - uint8_t *uu____1 = text + i * (uint32_t)512U; - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_core_256(k, ctx, i); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; - Lib_IntVector_Intrinsics_vec256 - v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; - k[0U] = v0; - k[1U] = v8; - k[2U] = v1; - k[3U] = v9; - k[4U] = v2; - k[5U] = v10; - k[6U] = v3; - k[7U] = v11; - k[8U] = v4; - k[9U] = v12; - k[10U] = v5; - k[11U] = v13; - k[12U] = v6; - k[13U] = v14; - k[14U] = v7; - k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y); - } - } - if (rem2 > (uint32_t)0U) { - uint8_t *uu____2 = out + nb * (uint32_t)512U; - uint8_t *uu____3 = text + nb * (uint32_t)512U; - uint8_t plain[512U] = { 0U }; - memcpy(plain, uu____3, rem1 * sizeof(uu____3[0U])); - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_core_256(k, ctx, nb); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; - Lib_IntVector_Intrinsics_vec256 - v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; - k[0U] = v0; - k[1U] = v8; - k[2U] = v1; - k[3U] = v9; - k[4U] = v2; - k[5U] = v10; - k[6U] = v3; - k[7U] = v11; - k[8U] = v4; - k[9U] = v12; - k[10U] = v5; - k[11U] = v13; - k[12U] = v6; - k[13U] = v14; - k[14U] = v7; - k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y); - } - memcpy(uu____2, plain, rem1 * sizeof(plain[0U])); - } -} - -void -Hacl_Chacha20_Vec256_chacha20_decrypt_256( - uint32_t len, - uint8_t *out, - uint8_t *cipher, - uint8_t *key, - uint8_t *n1, - uint32_t ctr) -{ - Lib_IntVector_Intrinsics_vec256 ctx[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_init_256(ctx, key, n1, ctr); - uint32_t rem1 = len % (uint32_t)512U; - uint32_t nb = len / (uint32_t)512U; - uint32_t rem2 = len % (uint32_t)512U; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *uu____0 = out + i * (uint32_t)512U; - uint8_t *uu____1 = cipher + i * (uint32_t)512U; - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_core_256(k, ctx, i); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; - Lib_IntVector_Intrinsics_vec256 - v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; - k[0U] = v0; - k[1U] = v8; - k[2U] = v1; - k[3U] = v9; - k[4U] = v2; - k[5U] = v10; - k[6U] = v3; - k[7U] = v11; - k[8U] = v4; - k[9U] = v12; - k[10U] = v5; - k[11U] = v13; - k[12U] = v6; - k[13U] = v14; - k[14U] = v7; - k[15U] = v15; - for (uint32_t i0 = (uint32_t)0U; i0 < (uint32_t)16U; i0++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(uu____1 + i0 * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i0]); - Lib_IntVector_Intrinsics_vec256_store_le(uu____0 + i0 * (uint32_t)32U, y); - } - } - if (rem2 > (uint32_t)0U) { - uint8_t *uu____2 = out + nb * (uint32_t)512U; - uint8_t *uu____3 = cipher + nb * (uint32_t)512U; - uint8_t plain[512U] = { 0U }; - memcpy(plain, uu____3, rem1 * sizeof(uu____3[0U])); - Lib_IntVector_Intrinsics_vec256 k[16U]; - for (uint32_t _i = 0U; _i < (uint32_t)16U; ++_i) - k[_i] = Lib_IntVector_Intrinsics_vec256_zero; - chacha20_core_256(k, ctx, nb); - Lib_IntVector_Intrinsics_vec256 v00 = k[0U]; - Lib_IntVector_Intrinsics_vec256 v16 = k[1U]; - Lib_IntVector_Intrinsics_vec256 v20 = k[2U]; - Lib_IntVector_Intrinsics_vec256 v30 = k[3U]; - Lib_IntVector_Intrinsics_vec256 v40 = k[4U]; - Lib_IntVector_Intrinsics_vec256 v50 = k[5U]; - Lib_IntVector_Intrinsics_vec256 v60 = k[6U]; - Lib_IntVector_Intrinsics_vec256 v70 = k[7U]; - Lib_IntVector_Intrinsics_vec256 - v0_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v1_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v00, v16); - Lib_IntVector_Intrinsics_vec256 - v2_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v3_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v20, v30); - Lib_IntVector_Intrinsics_vec256 - v4_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v5_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v40, v50); - Lib_IntVector_Intrinsics_vec256 - v6_ = Lib_IntVector_Intrinsics_vec256_interleave_low32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v7_ = Lib_IntVector_Intrinsics_vec256_interleave_high32(v60, v70); - Lib_IntVector_Intrinsics_vec256 - v0__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v1__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_, v2_); - Lib_IntVector_Intrinsics_vec256 - v2__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v3__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_, v3_); - Lib_IntVector_Intrinsics_vec256 - v4__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v5__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_, v6_); - Lib_IntVector_Intrinsics_vec256 - v6__ = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v7__ = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_, v7_); - Lib_IntVector_Intrinsics_vec256 - v0___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v1___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__, v4__); - Lib_IntVector_Intrinsics_vec256 - v2___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v3___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__, v5__); - Lib_IntVector_Intrinsics_vec256 - v4___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v5___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__, v6__); - Lib_IntVector_Intrinsics_vec256 - v6___ = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 - v7___ = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__, v7__); - Lib_IntVector_Intrinsics_vec256 v0 = v0___; - Lib_IntVector_Intrinsics_vec256 v1 = v2___; - Lib_IntVector_Intrinsics_vec256 v2 = v4___; - Lib_IntVector_Intrinsics_vec256 v3 = v6___; - Lib_IntVector_Intrinsics_vec256 v4 = v1___; - Lib_IntVector_Intrinsics_vec256 v5 = v3___; - Lib_IntVector_Intrinsics_vec256 v6 = v5___; - Lib_IntVector_Intrinsics_vec256 v7 = v7___; - Lib_IntVector_Intrinsics_vec256 v01 = k[8U]; - Lib_IntVector_Intrinsics_vec256 v110 = k[9U]; - Lib_IntVector_Intrinsics_vec256 v21 = k[10U]; - Lib_IntVector_Intrinsics_vec256 v31 = k[11U]; - Lib_IntVector_Intrinsics_vec256 v41 = k[12U]; - Lib_IntVector_Intrinsics_vec256 v51 = k[13U]; - Lib_IntVector_Intrinsics_vec256 v61 = k[14U]; - Lib_IntVector_Intrinsics_vec256 v71 = k[15U]; - Lib_IntVector_Intrinsics_vec256 - v0_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v1_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v01, v110); - Lib_IntVector_Intrinsics_vec256 - v2_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v3_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v21, v31); - Lib_IntVector_Intrinsics_vec256 - v4_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v5_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v41, v51); - Lib_IntVector_Intrinsics_vec256 - v6_0 = Lib_IntVector_Intrinsics_vec256_interleave_low32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v7_0 = Lib_IntVector_Intrinsics_vec256_interleave_high32(v61, v71); - Lib_IntVector_Intrinsics_vec256 - v0__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v1__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v0_0, v2_0); - Lib_IntVector_Intrinsics_vec256 - v2__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v3__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v1_0, v3_0); - Lib_IntVector_Intrinsics_vec256 - v4__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v5__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v4_0, v6_0); - Lib_IntVector_Intrinsics_vec256 - v6__0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v7__0 = Lib_IntVector_Intrinsics_vec256_interleave_high64(v5_0, v7_0); - Lib_IntVector_Intrinsics_vec256 - v0___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v1___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v0__0, v4__0); - Lib_IntVector_Intrinsics_vec256 - v2___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v3___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v1__0, v5__0); - Lib_IntVector_Intrinsics_vec256 - v4___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v5___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v2__0, v6__0); - Lib_IntVector_Intrinsics_vec256 - v6___0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 - v7___0 = Lib_IntVector_Intrinsics_vec256_interleave_high128(v3__0, v7__0); - Lib_IntVector_Intrinsics_vec256 v8 = v0___0; - Lib_IntVector_Intrinsics_vec256 v9 = v2___0; - Lib_IntVector_Intrinsics_vec256 v10 = v4___0; - Lib_IntVector_Intrinsics_vec256 v11 = v6___0; - Lib_IntVector_Intrinsics_vec256 v12 = v1___0; - Lib_IntVector_Intrinsics_vec256 v13 = v3___0; - Lib_IntVector_Intrinsics_vec256 v14 = v5___0; - Lib_IntVector_Intrinsics_vec256 v15 = v7___0; - k[0U] = v0; - k[1U] = v8; - k[2U] = v1; - k[3U] = v9; - k[4U] = v2; - k[5U] = v10; - k[6U] = v3; - k[7U] = v11; - k[8U] = v4; - k[9U] = v12; - k[10U] = v5; - k[11U] = v13; - k[12U] = v6; - k[13U] = v14; - k[14U] = v7; - k[15U] = v15; - for (uint32_t i = (uint32_t)0U; i < (uint32_t)16U; i++) { - Lib_IntVector_Intrinsics_vec256 - x = Lib_IntVector_Intrinsics_vec256_load_le(plain + i * (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 y = Lib_IntVector_Intrinsics_vec256_xor(x, k[i]); - Lib_IntVector_Intrinsics_vec256_store_le(plain + i * (uint32_t)32U, y); - } - memcpy(uu____2, plain, rem1 * sizeof(plain[0U])); - } -} diff --git a/lib/freebl/verified/Hacl_Chacha20_Vec256.h b/lib/freebl/verified/Hacl_Chacha20_Vec256.h deleted file mode 100644 index 59e295e8b..000000000 --- a/lib/freebl/verified/Hacl_Chacha20_Vec256.h +++ /dev/null @@ -1,55 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" -#include <string.h> -#include <stdbool.h> - -#ifndef __Hacl_Chacha20_Vec256_H -#define __Hacl_Chacha20_Vec256_H - -#include "Hacl_Chacha20.h" -#include "Hacl_Kremlib.h" - -void -Hacl_Chacha20_Vec256_chacha20_encrypt_256( - uint32_t len, - uint8_t *out, - uint8_t *text, - uint8_t *key, - uint8_t *n1, - uint32_t ctr); - -void -Hacl_Chacha20_Vec256_chacha20_decrypt_256( - uint32_t len, - uint8_t *out, - uint8_t *cipher, - uint8_t *key, - uint8_t *n1, - uint32_t ctr); - -#define __Hacl_Chacha20_Vec256_H_DEFINED -#endif diff --git a/lib/freebl/verified/Hacl_Poly1305_256.c b/lib/freebl/verified/Hacl_Poly1305_256.c deleted file mode 100644 index afe9a4f46..000000000 --- a/lib/freebl/verified/Hacl_Poly1305_256.c +++ /dev/null @@ -1,2120 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "Hacl_Poly1305_256.h" - -inline void -Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b) -{ - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(b); - Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(b + (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 - m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); - Lib_IntVector_Intrinsics_vec256 t0 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); - Lib_IntVector_Intrinsics_vec256 t3 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); - Lib_IntVector_Intrinsics_vec256 - t2 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)4U); - Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t2, mask261); - Lib_IntVector_Intrinsics_vec256 - t1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t1, mask261); - Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 - t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)30U); - Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask261); - Lib_IntVector_Intrinsics_vec256 - o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 o0 = o5; - Lib_IntVector_Intrinsics_vec256 o1 = o10; - Lib_IntVector_Intrinsics_vec256 o2 = o20; - Lib_IntVector_Intrinsics_vec256 o3 = o30; - Lib_IntVector_Intrinsics_vec256 o4 = o40; - e[0U] = o0; - e[1U] = o1; - e[2U] = o2; - e[3U] = o3; - e[4U] = o4; - uint64_t b1 = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b1); - Lib_IntVector_Intrinsics_vec256 f40 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f40, mask); - Lib_IntVector_Intrinsics_vec256 acc0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 acc1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 acc2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 acc3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 acc4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 e0 = e[0U]; - Lib_IntVector_Intrinsics_vec256 e1 = e[1U]; - Lib_IntVector_Intrinsics_vec256 e2 = e[2U]; - Lib_IntVector_Intrinsics_vec256 e3 = e[3U]; - Lib_IntVector_Intrinsics_vec256 e4 = e[4U]; - Lib_IntVector_Intrinsics_vec256 r0 = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 r1 = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 r2 = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 r3 = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 r4 = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 - r01 = - Lib_IntVector_Intrinsics_vec256_insert64(r0, - Lib_IntVector_Intrinsics_vec256_extract64(acc0, (uint32_t)0U), - (uint32_t)0U); - Lib_IntVector_Intrinsics_vec256 - r11 = - Lib_IntVector_Intrinsics_vec256_insert64(r1, - Lib_IntVector_Intrinsics_vec256_extract64(acc1, (uint32_t)0U), - (uint32_t)0U); - Lib_IntVector_Intrinsics_vec256 - r21 = - Lib_IntVector_Intrinsics_vec256_insert64(r2, - Lib_IntVector_Intrinsics_vec256_extract64(acc2, (uint32_t)0U), - (uint32_t)0U); - Lib_IntVector_Intrinsics_vec256 - r31 = - Lib_IntVector_Intrinsics_vec256_insert64(r3, - Lib_IntVector_Intrinsics_vec256_extract64(acc3, (uint32_t)0U), - (uint32_t)0U); - Lib_IntVector_Intrinsics_vec256 - r41 = - Lib_IntVector_Intrinsics_vec256_insert64(r4, - Lib_IntVector_Intrinsics_vec256_extract64(acc4, (uint32_t)0U), - (uint32_t)0U); - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_add64(r01, e0); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_add64(r11, e1); - Lib_IntVector_Intrinsics_vec256 f2 = Lib_IntVector_Intrinsics_vec256_add64(r21, e2); - Lib_IntVector_Intrinsics_vec256 f3 = Lib_IntVector_Intrinsics_vec256_add64(r31, e3); - Lib_IntVector_Intrinsics_vec256 f4 = Lib_IntVector_Intrinsics_vec256_add64(r41, e4); - Lib_IntVector_Intrinsics_vec256 acc01 = f0; - Lib_IntVector_Intrinsics_vec256 acc11 = f1; - Lib_IntVector_Intrinsics_vec256 acc21 = f2; - Lib_IntVector_Intrinsics_vec256 acc31 = f3; - Lib_IntVector_Intrinsics_vec256 acc41 = f4; - acc[0U] = acc01; - acc[1U] = acc11; - acc[2U] = acc21; - acc[3U] = acc31; - acc[4U] = acc41; -} - -inline void -Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( - Lib_IntVector_Intrinsics_vec256 *out, - Lib_IntVector_Intrinsics_vec256 *p) -{ - Lib_IntVector_Intrinsics_vec256 *r = p; - Lib_IntVector_Intrinsics_vec256 *r_5 = p + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *r4 = p + (uint32_t)10U; - Lib_IntVector_Intrinsics_vec256 a0 = out[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = out[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = out[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = out[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = out[4U]; - Lib_IntVector_Intrinsics_vec256 r10 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r11 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r12 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r13 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r14 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r151 = r_5[1U]; - Lib_IntVector_Intrinsics_vec256 r152 = r_5[2U]; - Lib_IntVector_Intrinsics_vec256 r153 = r_5[3U]; - Lib_IntVector_Intrinsics_vec256 r154 = r_5[4U]; - Lib_IntVector_Intrinsics_vec256 r40 = r4[0U]; - Lib_IntVector_Intrinsics_vec256 r41 = r4[1U]; - Lib_IntVector_Intrinsics_vec256 r42 = r4[2U]; - Lib_IntVector_Intrinsics_vec256 r43 = r4[3U]; - Lib_IntVector_Intrinsics_vec256 r44 = r4[4U]; - Lib_IntVector_Intrinsics_vec256 a010 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r10); - Lib_IntVector_Intrinsics_vec256 a110 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r10); - Lib_IntVector_Intrinsics_vec256 a210 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r10); - Lib_IntVector_Intrinsics_vec256 a310 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r10); - Lib_IntVector_Intrinsics_vec256 a410 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r10); - Lib_IntVector_Intrinsics_vec256 - a020 = - Lib_IntVector_Intrinsics_vec256_add64(a010, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r11)); - Lib_IntVector_Intrinsics_vec256 - a120 = - Lib_IntVector_Intrinsics_vec256_add64(a110, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r11)); - Lib_IntVector_Intrinsics_vec256 - a220 = - Lib_IntVector_Intrinsics_vec256_add64(a210, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r11)); - Lib_IntVector_Intrinsics_vec256 - a320 = - Lib_IntVector_Intrinsics_vec256_add64(a310, - Lib_IntVector_Intrinsics_vec256_mul64(r12, r11)); - Lib_IntVector_Intrinsics_vec256 - a420 = - Lib_IntVector_Intrinsics_vec256_add64(a410, - Lib_IntVector_Intrinsics_vec256_mul64(r13, r11)); - Lib_IntVector_Intrinsics_vec256 - a030 = - Lib_IntVector_Intrinsics_vec256_add64(a020, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r12)); - Lib_IntVector_Intrinsics_vec256 - a130 = - Lib_IntVector_Intrinsics_vec256_add64(a120, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r12)); - Lib_IntVector_Intrinsics_vec256 - a230 = - Lib_IntVector_Intrinsics_vec256_add64(a220, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r12)); - Lib_IntVector_Intrinsics_vec256 - a330 = - Lib_IntVector_Intrinsics_vec256_add64(a320, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r12)); - Lib_IntVector_Intrinsics_vec256 - a430 = - Lib_IntVector_Intrinsics_vec256_add64(a420, - Lib_IntVector_Intrinsics_vec256_mul64(r12, r12)); - Lib_IntVector_Intrinsics_vec256 - a040 = - Lib_IntVector_Intrinsics_vec256_add64(a030, - Lib_IntVector_Intrinsics_vec256_mul64(r152, r13)); - Lib_IntVector_Intrinsics_vec256 - a140 = - Lib_IntVector_Intrinsics_vec256_add64(a130, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r13)); - Lib_IntVector_Intrinsics_vec256 - a240 = - Lib_IntVector_Intrinsics_vec256_add64(a230, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r13)); - Lib_IntVector_Intrinsics_vec256 - a340 = - Lib_IntVector_Intrinsics_vec256_add64(a330, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r13)); - Lib_IntVector_Intrinsics_vec256 - a440 = - Lib_IntVector_Intrinsics_vec256_add64(a430, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r13)); - Lib_IntVector_Intrinsics_vec256 - a050 = - Lib_IntVector_Intrinsics_vec256_add64(a040, - Lib_IntVector_Intrinsics_vec256_mul64(r151, r14)); - Lib_IntVector_Intrinsics_vec256 - a150 = - Lib_IntVector_Intrinsics_vec256_add64(a140, - Lib_IntVector_Intrinsics_vec256_mul64(r152, r14)); - Lib_IntVector_Intrinsics_vec256 - a250 = - Lib_IntVector_Intrinsics_vec256_add64(a240, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r14)); - Lib_IntVector_Intrinsics_vec256 - a350 = - Lib_IntVector_Intrinsics_vec256_add64(a340, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r14)); - Lib_IntVector_Intrinsics_vec256 - a450 = - Lib_IntVector_Intrinsics_vec256_add64(a440, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r14)); - Lib_IntVector_Intrinsics_vec256 t00 = a050; - Lib_IntVector_Intrinsics_vec256 t10 = a150; - Lib_IntVector_Intrinsics_vec256 t20 = a250; - Lib_IntVector_Intrinsics_vec256 t30 = a350; - Lib_IntVector_Intrinsics_vec256 t40 = a450; - Lib_IntVector_Intrinsics_vec256 - mask2610 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask2610); - Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask2610); - Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); - Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); - Lib_IntVector_Intrinsics_vec256 - z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); - Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask2610); - Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask2610); - Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); - Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); - Lib_IntVector_Intrinsics_vec256 - z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask2610); - Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask2610); - Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); - Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); - Lib_IntVector_Intrinsics_vec256 - z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask2610); - Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); - Lib_IntVector_Intrinsics_vec256 r20 = x020; - Lib_IntVector_Intrinsics_vec256 r21 = x120; - Lib_IntVector_Intrinsics_vec256 r22 = x210; - Lib_IntVector_Intrinsics_vec256 r23 = x320; - Lib_IntVector_Intrinsics_vec256 r24 = x420; - Lib_IntVector_Intrinsics_vec256 a011 = Lib_IntVector_Intrinsics_vec256_mul64(r10, r20); - Lib_IntVector_Intrinsics_vec256 a111 = Lib_IntVector_Intrinsics_vec256_mul64(r11, r20); - Lib_IntVector_Intrinsics_vec256 a211 = Lib_IntVector_Intrinsics_vec256_mul64(r12, r20); - Lib_IntVector_Intrinsics_vec256 a311 = Lib_IntVector_Intrinsics_vec256_mul64(r13, r20); - Lib_IntVector_Intrinsics_vec256 a411 = Lib_IntVector_Intrinsics_vec256_mul64(r14, r20); - Lib_IntVector_Intrinsics_vec256 - a021 = - Lib_IntVector_Intrinsics_vec256_add64(a011, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r21)); - Lib_IntVector_Intrinsics_vec256 - a121 = - Lib_IntVector_Intrinsics_vec256_add64(a111, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r21)); - Lib_IntVector_Intrinsics_vec256 - a221 = - Lib_IntVector_Intrinsics_vec256_add64(a211, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r21)); - Lib_IntVector_Intrinsics_vec256 - a321 = - Lib_IntVector_Intrinsics_vec256_add64(a311, - Lib_IntVector_Intrinsics_vec256_mul64(r12, r21)); - Lib_IntVector_Intrinsics_vec256 - a421 = - Lib_IntVector_Intrinsics_vec256_add64(a411, - Lib_IntVector_Intrinsics_vec256_mul64(r13, r21)); - Lib_IntVector_Intrinsics_vec256 - a031 = - Lib_IntVector_Intrinsics_vec256_add64(a021, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r22)); - Lib_IntVector_Intrinsics_vec256 - a131 = - Lib_IntVector_Intrinsics_vec256_add64(a121, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r22)); - Lib_IntVector_Intrinsics_vec256 - a231 = - Lib_IntVector_Intrinsics_vec256_add64(a221, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r22)); - Lib_IntVector_Intrinsics_vec256 - a331 = - Lib_IntVector_Intrinsics_vec256_add64(a321, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r22)); - Lib_IntVector_Intrinsics_vec256 - a431 = - Lib_IntVector_Intrinsics_vec256_add64(a421, - Lib_IntVector_Intrinsics_vec256_mul64(r12, r22)); - Lib_IntVector_Intrinsics_vec256 - a041 = - Lib_IntVector_Intrinsics_vec256_add64(a031, - Lib_IntVector_Intrinsics_vec256_mul64(r152, r23)); - Lib_IntVector_Intrinsics_vec256 - a141 = - Lib_IntVector_Intrinsics_vec256_add64(a131, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r23)); - Lib_IntVector_Intrinsics_vec256 - a241 = - Lib_IntVector_Intrinsics_vec256_add64(a231, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r23)); - Lib_IntVector_Intrinsics_vec256 - a341 = - Lib_IntVector_Intrinsics_vec256_add64(a331, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r23)); - Lib_IntVector_Intrinsics_vec256 - a441 = - Lib_IntVector_Intrinsics_vec256_add64(a431, - Lib_IntVector_Intrinsics_vec256_mul64(r11, r23)); - Lib_IntVector_Intrinsics_vec256 - a051 = - Lib_IntVector_Intrinsics_vec256_add64(a041, - Lib_IntVector_Intrinsics_vec256_mul64(r151, r24)); - Lib_IntVector_Intrinsics_vec256 - a151 = - Lib_IntVector_Intrinsics_vec256_add64(a141, - Lib_IntVector_Intrinsics_vec256_mul64(r152, r24)); - Lib_IntVector_Intrinsics_vec256 - a251 = - Lib_IntVector_Intrinsics_vec256_add64(a241, - Lib_IntVector_Intrinsics_vec256_mul64(r153, r24)); - Lib_IntVector_Intrinsics_vec256 - a351 = - Lib_IntVector_Intrinsics_vec256_add64(a341, - Lib_IntVector_Intrinsics_vec256_mul64(r154, r24)); - Lib_IntVector_Intrinsics_vec256 - a451 = - Lib_IntVector_Intrinsics_vec256_add64(a441, - Lib_IntVector_Intrinsics_vec256_mul64(r10, r24)); - Lib_IntVector_Intrinsics_vec256 t01 = a051; - Lib_IntVector_Intrinsics_vec256 t11 = a151; - Lib_IntVector_Intrinsics_vec256 t21 = a251; - Lib_IntVector_Intrinsics_vec256 t31 = a351; - Lib_IntVector_Intrinsics_vec256 t41 = a451; - Lib_IntVector_Intrinsics_vec256 - mask2611 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z04 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z14 = Lib_IntVector_Intrinsics_vec256_shift_right64(t31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x03 = Lib_IntVector_Intrinsics_vec256_and(t01, mask2611); - Lib_IntVector_Intrinsics_vec256 x33 = Lib_IntVector_Intrinsics_vec256_and(t31, mask2611); - Lib_IntVector_Intrinsics_vec256 x13 = Lib_IntVector_Intrinsics_vec256_add64(t11, z04); - Lib_IntVector_Intrinsics_vec256 x43 = Lib_IntVector_Intrinsics_vec256_add64(t41, z14); - Lib_IntVector_Intrinsics_vec256 - z011 = Lib_IntVector_Intrinsics_vec256_shift_right64(x13, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z111 = Lib_IntVector_Intrinsics_vec256_shift_right64(x43, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t6 = Lib_IntVector_Intrinsics_vec256_shift_left64(z111, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z111, t6); - Lib_IntVector_Intrinsics_vec256 x111 = Lib_IntVector_Intrinsics_vec256_and(x13, mask2611); - Lib_IntVector_Intrinsics_vec256 x411 = Lib_IntVector_Intrinsics_vec256_and(x43, mask2611); - Lib_IntVector_Intrinsics_vec256 x22 = Lib_IntVector_Intrinsics_vec256_add64(t21, z011); - Lib_IntVector_Intrinsics_vec256 x011 = Lib_IntVector_Intrinsics_vec256_add64(x03, z120); - Lib_IntVector_Intrinsics_vec256 - z021 = Lib_IntVector_Intrinsics_vec256_shift_right64(x22, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z131 = Lib_IntVector_Intrinsics_vec256_shift_right64(x011, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x211 = Lib_IntVector_Intrinsics_vec256_and(x22, mask2611); - Lib_IntVector_Intrinsics_vec256 x021 = Lib_IntVector_Intrinsics_vec256_and(x011, mask2611); - Lib_IntVector_Intrinsics_vec256 x311 = Lib_IntVector_Intrinsics_vec256_add64(x33, z021); - Lib_IntVector_Intrinsics_vec256 x121 = Lib_IntVector_Intrinsics_vec256_add64(x111, z131); - Lib_IntVector_Intrinsics_vec256 - z031 = Lib_IntVector_Intrinsics_vec256_shift_right64(x311, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x321 = Lib_IntVector_Intrinsics_vec256_and(x311, mask2611); - Lib_IntVector_Intrinsics_vec256 x421 = Lib_IntVector_Intrinsics_vec256_add64(x411, z031); - Lib_IntVector_Intrinsics_vec256 r30 = x021; - Lib_IntVector_Intrinsics_vec256 r31 = x121; - Lib_IntVector_Intrinsics_vec256 r32 = x211; - Lib_IntVector_Intrinsics_vec256 r33 = x321; - Lib_IntVector_Intrinsics_vec256 r34 = x421; - Lib_IntVector_Intrinsics_vec256 - v12120 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r20, r10); - Lib_IntVector_Intrinsics_vec256 - v34340 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r40, r30); - Lib_IntVector_Intrinsics_vec256 - r12340 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34340, v12120); - Lib_IntVector_Intrinsics_vec256 - v12121 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r21, r11); - Lib_IntVector_Intrinsics_vec256 - v34341 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r41, r31); - Lib_IntVector_Intrinsics_vec256 - r12341 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34341, v12121); - Lib_IntVector_Intrinsics_vec256 - v12122 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r22, r12); - Lib_IntVector_Intrinsics_vec256 - v34342 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r42, r32); - Lib_IntVector_Intrinsics_vec256 - r12342 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34342, v12122); - Lib_IntVector_Intrinsics_vec256 - v12123 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r23, r13); - Lib_IntVector_Intrinsics_vec256 - v34343 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r43, r33); - Lib_IntVector_Intrinsics_vec256 - r12343 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34343, v12123); - Lib_IntVector_Intrinsics_vec256 - v12124 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r24, r14); - Lib_IntVector_Intrinsics_vec256 - v34344 = Lib_IntVector_Intrinsics_vec256_interleave_low64(r44, r34); - Lib_IntVector_Intrinsics_vec256 - r12344 = Lib_IntVector_Intrinsics_vec256_interleave_low128(v34344, v12124); - Lib_IntVector_Intrinsics_vec256 - r123451 = Lib_IntVector_Intrinsics_vec256_smul64(r12341, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 - r123452 = Lib_IntVector_Intrinsics_vec256_smul64(r12342, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 - r123453 = Lib_IntVector_Intrinsics_vec256_smul64(r12343, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 - r123454 = Lib_IntVector_Intrinsics_vec256_smul64(r12344, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_mul64(r12340, a0); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_mul64(r12341, a0); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_mul64(r12342, a0); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_mul64(r12343, a0); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_mul64(r12344, a0); - Lib_IntVector_Intrinsics_vec256 - a02 = - Lib_IntVector_Intrinsics_vec256_add64(a01, - Lib_IntVector_Intrinsics_vec256_mul64(r123454, a1)); - Lib_IntVector_Intrinsics_vec256 - a12 = - Lib_IntVector_Intrinsics_vec256_add64(a11, - Lib_IntVector_Intrinsics_vec256_mul64(r12340, a1)); - Lib_IntVector_Intrinsics_vec256 - a22 = - Lib_IntVector_Intrinsics_vec256_add64(a21, - Lib_IntVector_Intrinsics_vec256_mul64(r12341, a1)); - Lib_IntVector_Intrinsics_vec256 - a32 = - Lib_IntVector_Intrinsics_vec256_add64(a31, - Lib_IntVector_Intrinsics_vec256_mul64(r12342, a1)); - Lib_IntVector_Intrinsics_vec256 - a42 = - Lib_IntVector_Intrinsics_vec256_add64(a41, - Lib_IntVector_Intrinsics_vec256_mul64(r12343, a1)); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r123453, a2)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r123454, a2)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r12340, a2)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r12341, a2)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r12342, a2)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r123452, a3)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r123453, a3)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r123454, a3)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r12340, a3)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r12341, a3)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r123451, a4)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r123452, a4)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r123453, a4)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r123454, a4)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r12340, a4)); - Lib_IntVector_Intrinsics_vec256 t0 = a05; - Lib_IntVector_Intrinsics_vec256 t1 = a15; - Lib_IntVector_Intrinsics_vec256 t2 = a25; - Lib_IntVector_Intrinsics_vec256 t3 = a35; - Lib_IntVector_Intrinsics_vec256 t4 = a45; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z121 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z121); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o10 = x12; - Lib_IntVector_Intrinsics_vec256 o20 = x21; - Lib_IntVector_Intrinsics_vec256 o30 = x32; - Lib_IntVector_Intrinsics_vec256 o40 = x42; - Lib_IntVector_Intrinsics_vec256 - v00 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o0, o0); - Lib_IntVector_Intrinsics_vec256 v10 = Lib_IntVector_Intrinsics_vec256_add64(o0, v00); - Lib_IntVector_Intrinsics_vec256 - v20 = - Lib_IntVector_Intrinsics_vec256_add64(v10, - Lib_IntVector_Intrinsics_vec256_shuffle64(v10, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U)); - Lib_IntVector_Intrinsics_vec256 - v01 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o10, o10); - Lib_IntVector_Intrinsics_vec256 v11 = Lib_IntVector_Intrinsics_vec256_add64(o10, v01); - Lib_IntVector_Intrinsics_vec256 - v21 = - Lib_IntVector_Intrinsics_vec256_add64(v11, - Lib_IntVector_Intrinsics_vec256_shuffle64(v11, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U)); - Lib_IntVector_Intrinsics_vec256 - v02 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o20, o20); - Lib_IntVector_Intrinsics_vec256 v12 = Lib_IntVector_Intrinsics_vec256_add64(o20, v02); - Lib_IntVector_Intrinsics_vec256 - v22 = - Lib_IntVector_Intrinsics_vec256_add64(v12, - Lib_IntVector_Intrinsics_vec256_shuffle64(v12, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U)); - Lib_IntVector_Intrinsics_vec256 - v03 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o30, o30); - Lib_IntVector_Intrinsics_vec256 v13 = Lib_IntVector_Intrinsics_vec256_add64(o30, v03); - Lib_IntVector_Intrinsics_vec256 - v23 = - Lib_IntVector_Intrinsics_vec256_add64(v13, - Lib_IntVector_Intrinsics_vec256_shuffle64(v13, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U)); - Lib_IntVector_Intrinsics_vec256 - v04 = Lib_IntVector_Intrinsics_vec256_interleave_high128(o40, o40); - Lib_IntVector_Intrinsics_vec256 v14 = Lib_IntVector_Intrinsics_vec256_add64(o40, v04); - Lib_IntVector_Intrinsics_vec256 - v24 = - Lib_IntVector_Intrinsics_vec256_add64(v14, - Lib_IntVector_Intrinsics_vec256_shuffle64(v14, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U, - (uint32_t)1U)); - Lib_IntVector_Intrinsics_vec256 - l = Lib_IntVector_Intrinsics_vec256_add64(v20, Lib_IntVector_Intrinsics_vec256_zero); - Lib_IntVector_Intrinsics_vec256 - tmp0 = - Lib_IntVector_Intrinsics_vec256_and(l, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l0 = Lib_IntVector_Intrinsics_vec256_add64(v21, c0); - Lib_IntVector_Intrinsics_vec256 - tmp1 = - Lib_IntVector_Intrinsics_vec256_and(l0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(v22, c1); - Lib_IntVector_Intrinsics_vec256 - tmp2 = - Lib_IntVector_Intrinsics_vec256_and(l1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(v23, c2); - Lib_IntVector_Intrinsics_vec256 - tmp3 = - Lib_IntVector_Intrinsics_vec256_and(l2, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(v24, c3); - Lib_IntVector_Intrinsics_vec256 - tmp4 = - Lib_IntVector_Intrinsics_vec256_and(l3, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - o00 = - Lib_IntVector_Intrinsics_vec256_add64(tmp0, - Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); - Lib_IntVector_Intrinsics_vec256 o1 = tmp1; - Lib_IntVector_Intrinsics_vec256 o2 = tmp2; - Lib_IntVector_Intrinsics_vec256 o3 = tmp3; - Lib_IntVector_Intrinsics_vec256 o4 = tmp4; - out[0U] = o00; - out[1U] = o1; - out[2U] = o2; - out[3U] = o3; - out[4U] = o4; -} - -uint32_t Hacl_Poly1305_256_blocklen = (uint32_t)16U; - -void -Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key) -{ - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; - uint8_t *kr = key; - acc[0U] = Lib_IntVector_Intrinsics_vec256_zero; - acc[1U] = Lib_IntVector_Intrinsics_vec256_zero; - acc[2U] = Lib_IntVector_Intrinsics_vec256_zero; - acc[3U] = Lib_IntVector_Intrinsics_vec256_zero; - acc[4U] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(kr); - uint64_t lo = u0; - uint64_t u = load64_le(kr + (uint32_t)8U); - uint64_t hi = u; - uint64_t mask0 = (uint64_t)0x0ffffffc0fffffffU; - uint64_t mask1 = (uint64_t)0x0ffffffc0ffffffcU; - uint64_t lo1 = lo & mask0; - uint64_t hi1 = hi & mask1; - Lib_IntVector_Intrinsics_vec256 *r = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; - Lib_IntVector_Intrinsics_vec256 *rn_5 = pre + (uint32_t)15U; - Lib_IntVector_Intrinsics_vec256 r_vec0 = Lib_IntVector_Intrinsics_vec256_load64(lo1); - Lib_IntVector_Intrinsics_vec256 r_vec1 = Lib_IntVector_Intrinsics_vec256_load64(hi1); - Lib_IntVector_Intrinsics_vec256 - f00 = - Lib_IntVector_Intrinsics_vec256_and(r_vec0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f15 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(r_vec1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(r_vec1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f0 = f00; - Lib_IntVector_Intrinsics_vec256 f1 = f15; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f4 = f40; - r[0U] = f0; - r[1U] = f1; - r[2U] = f2; - r[3U] = f3; - r[4U] = f4; - Lib_IntVector_Intrinsics_vec256 f200 = r[0U]; - Lib_IntVector_Intrinsics_vec256 f210 = r[1U]; - Lib_IntVector_Intrinsics_vec256 f220 = r[2U]; - Lib_IntVector_Intrinsics_vec256 f230 = r[3U]; - Lib_IntVector_Intrinsics_vec256 f240 = r[4U]; - r5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f200, (uint64_t)5U); - r5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f210, (uint64_t)5U); - r5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f220, (uint64_t)5U); - r5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f230, (uint64_t)5U); - r5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f240, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r10 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r20 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r30 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r40 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r510 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r520 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r530 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r540 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f100 = r[0U]; - Lib_IntVector_Intrinsics_vec256 f110 = r[1U]; - Lib_IntVector_Intrinsics_vec256 f120 = r[2U]; - Lib_IntVector_Intrinsics_vec256 f130 = r[3U]; - Lib_IntVector_Intrinsics_vec256 f140 = r[4U]; - Lib_IntVector_Intrinsics_vec256 a00 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f100); - Lib_IntVector_Intrinsics_vec256 a10 = Lib_IntVector_Intrinsics_vec256_mul64(r10, f100); - Lib_IntVector_Intrinsics_vec256 a20 = Lib_IntVector_Intrinsics_vec256_mul64(r20, f100); - Lib_IntVector_Intrinsics_vec256 a30 = Lib_IntVector_Intrinsics_vec256_mul64(r30, f100); - Lib_IntVector_Intrinsics_vec256 a40 = Lib_IntVector_Intrinsics_vec256_mul64(r40, f100); - Lib_IntVector_Intrinsics_vec256 - a010 = - Lib_IntVector_Intrinsics_vec256_add64(a00, - Lib_IntVector_Intrinsics_vec256_mul64(r540, f110)); - Lib_IntVector_Intrinsics_vec256 - a110 = - Lib_IntVector_Intrinsics_vec256_add64(a10, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); - Lib_IntVector_Intrinsics_vec256 - a210 = - Lib_IntVector_Intrinsics_vec256_add64(a20, - Lib_IntVector_Intrinsics_vec256_mul64(r10, f110)); - Lib_IntVector_Intrinsics_vec256 - a310 = - Lib_IntVector_Intrinsics_vec256_add64(a30, - Lib_IntVector_Intrinsics_vec256_mul64(r20, f110)); - Lib_IntVector_Intrinsics_vec256 - a410 = - Lib_IntVector_Intrinsics_vec256_add64(a40, - Lib_IntVector_Intrinsics_vec256_mul64(r30, f110)); - Lib_IntVector_Intrinsics_vec256 - a020 = - Lib_IntVector_Intrinsics_vec256_add64(a010, - Lib_IntVector_Intrinsics_vec256_mul64(r530, f120)); - Lib_IntVector_Intrinsics_vec256 - a120 = - Lib_IntVector_Intrinsics_vec256_add64(a110, - Lib_IntVector_Intrinsics_vec256_mul64(r540, f120)); - Lib_IntVector_Intrinsics_vec256 - a220 = - Lib_IntVector_Intrinsics_vec256_add64(a210, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); - Lib_IntVector_Intrinsics_vec256 - a320 = - Lib_IntVector_Intrinsics_vec256_add64(a310, - Lib_IntVector_Intrinsics_vec256_mul64(r10, f120)); - Lib_IntVector_Intrinsics_vec256 - a420 = - Lib_IntVector_Intrinsics_vec256_add64(a410, - Lib_IntVector_Intrinsics_vec256_mul64(r20, f120)); - Lib_IntVector_Intrinsics_vec256 - a030 = - Lib_IntVector_Intrinsics_vec256_add64(a020, - Lib_IntVector_Intrinsics_vec256_mul64(r520, f130)); - Lib_IntVector_Intrinsics_vec256 - a130 = - Lib_IntVector_Intrinsics_vec256_add64(a120, - Lib_IntVector_Intrinsics_vec256_mul64(r530, f130)); - Lib_IntVector_Intrinsics_vec256 - a230 = - Lib_IntVector_Intrinsics_vec256_add64(a220, - Lib_IntVector_Intrinsics_vec256_mul64(r540, f130)); - Lib_IntVector_Intrinsics_vec256 - a330 = - Lib_IntVector_Intrinsics_vec256_add64(a320, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); - Lib_IntVector_Intrinsics_vec256 - a430 = - Lib_IntVector_Intrinsics_vec256_add64(a420, - Lib_IntVector_Intrinsics_vec256_mul64(r10, f130)); - Lib_IntVector_Intrinsics_vec256 - a040 = - Lib_IntVector_Intrinsics_vec256_add64(a030, - Lib_IntVector_Intrinsics_vec256_mul64(r510, f140)); - Lib_IntVector_Intrinsics_vec256 - a140 = - Lib_IntVector_Intrinsics_vec256_add64(a130, - Lib_IntVector_Intrinsics_vec256_mul64(r520, f140)); - Lib_IntVector_Intrinsics_vec256 - a240 = - Lib_IntVector_Intrinsics_vec256_add64(a230, - Lib_IntVector_Intrinsics_vec256_mul64(r530, f140)); - Lib_IntVector_Intrinsics_vec256 - a340 = - Lib_IntVector_Intrinsics_vec256_add64(a330, - Lib_IntVector_Intrinsics_vec256_mul64(r540, f140)); - Lib_IntVector_Intrinsics_vec256 - a440 = - Lib_IntVector_Intrinsics_vec256_add64(a430, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); - Lib_IntVector_Intrinsics_vec256 t00 = a040; - Lib_IntVector_Intrinsics_vec256 t10 = a140; - Lib_IntVector_Intrinsics_vec256 t20 = a240; - Lib_IntVector_Intrinsics_vec256 t30 = a340; - Lib_IntVector_Intrinsics_vec256 t40 = a440; - Lib_IntVector_Intrinsics_vec256 - mask2610 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z00 = Lib_IntVector_Intrinsics_vec256_shift_right64(t00, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x00 = Lib_IntVector_Intrinsics_vec256_and(t00, mask2610); - Lib_IntVector_Intrinsics_vec256 x30 = Lib_IntVector_Intrinsics_vec256_and(t30, mask2610); - Lib_IntVector_Intrinsics_vec256 x10 = Lib_IntVector_Intrinsics_vec256_add64(t10, z00); - Lib_IntVector_Intrinsics_vec256 x40 = Lib_IntVector_Intrinsics_vec256_add64(t40, z10); - Lib_IntVector_Intrinsics_vec256 - z010 = Lib_IntVector_Intrinsics_vec256_shift_right64(x10, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z110 = Lib_IntVector_Intrinsics_vec256_shift_right64(x40, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t5 = Lib_IntVector_Intrinsics_vec256_shift_left64(z110, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z110, t5); - Lib_IntVector_Intrinsics_vec256 x110 = Lib_IntVector_Intrinsics_vec256_and(x10, mask2610); - Lib_IntVector_Intrinsics_vec256 x410 = Lib_IntVector_Intrinsics_vec256_and(x40, mask2610); - Lib_IntVector_Intrinsics_vec256 x20 = Lib_IntVector_Intrinsics_vec256_add64(t20, z010); - Lib_IntVector_Intrinsics_vec256 x010 = Lib_IntVector_Intrinsics_vec256_add64(x00, z12); - Lib_IntVector_Intrinsics_vec256 - z020 = Lib_IntVector_Intrinsics_vec256_shift_right64(x20, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z130 = Lib_IntVector_Intrinsics_vec256_shift_right64(x010, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x210 = Lib_IntVector_Intrinsics_vec256_and(x20, mask2610); - Lib_IntVector_Intrinsics_vec256 x020 = Lib_IntVector_Intrinsics_vec256_and(x010, mask2610); - Lib_IntVector_Intrinsics_vec256 x310 = Lib_IntVector_Intrinsics_vec256_add64(x30, z020); - Lib_IntVector_Intrinsics_vec256 x120 = Lib_IntVector_Intrinsics_vec256_add64(x110, z130); - Lib_IntVector_Intrinsics_vec256 - z030 = Lib_IntVector_Intrinsics_vec256_shift_right64(x310, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x320 = Lib_IntVector_Intrinsics_vec256_and(x310, mask2610); - Lib_IntVector_Intrinsics_vec256 x420 = Lib_IntVector_Intrinsics_vec256_add64(x410, z030); - Lib_IntVector_Intrinsics_vec256 o00 = x020; - Lib_IntVector_Intrinsics_vec256 o10 = x120; - Lib_IntVector_Intrinsics_vec256 o20 = x210; - Lib_IntVector_Intrinsics_vec256 o30 = x320; - Lib_IntVector_Intrinsics_vec256 o40 = x420; - rn[0U] = o00; - rn[1U] = o10; - rn[2U] = o20; - rn[3U] = o30; - rn[4U] = o40; - Lib_IntVector_Intrinsics_vec256 f201 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 f211 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 f221 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 f231 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 f241 = rn[4U]; - rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f201, (uint64_t)5U); - rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f211, (uint64_t)5U); - rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f221, (uint64_t)5U); - rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f231, (uint64_t)5U); - rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f241, (uint64_t)5U); - Lib_IntVector_Intrinsics_vec256 r00 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = rn_5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = rn_5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = rn_5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = rn_5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = rn[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r00, f10); - Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); - Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); - Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); - Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); - Lib_IntVector_Intrinsics_vec256 - a01 = - Lib_IntVector_Intrinsics_vec256_add64(a0, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f11)); - Lib_IntVector_Intrinsics_vec256 - a11 = - Lib_IntVector_Intrinsics_vec256_add64(a1, - Lib_IntVector_Intrinsics_vec256_mul64(r00, f11)); - Lib_IntVector_Intrinsics_vec256 - a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, Lib_IntVector_Intrinsics_vec256_mul64(r1, f11)); - Lib_IntVector_Intrinsics_vec256 - a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, Lib_IntVector_Intrinsics_vec256_mul64(r2, f11)); - Lib_IntVector_Intrinsics_vec256 - a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, Lib_IntVector_Intrinsics_vec256_mul64(r3, f11)); - Lib_IntVector_Intrinsics_vec256 - a02 = - Lib_IntVector_Intrinsics_vec256_add64(a01, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f12)); - Lib_IntVector_Intrinsics_vec256 - a12 = - Lib_IntVector_Intrinsics_vec256_add64(a11, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f12)); - Lib_IntVector_Intrinsics_vec256 - a22 = - Lib_IntVector_Intrinsics_vec256_add64(a21, - Lib_IntVector_Intrinsics_vec256_mul64(r00, f12)); - Lib_IntVector_Intrinsics_vec256 - a32 = - Lib_IntVector_Intrinsics_vec256_add64(a31, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f12)); - Lib_IntVector_Intrinsics_vec256 - a42 = - Lib_IntVector_Intrinsics_vec256_add64(a41, - Lib_IntVector_Intrinsics_vec256_mul64(r2, f12)); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f13)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f13)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f13)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r00, f13)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f13)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r51, f14)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f14)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f14)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f14)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r00, f14)); - Lib_IntVector_Intrinsics_vec256 t0 = a04; - Lib_IntVector_Intrinsics_vec256 t1 = a14; - Lib_IntVector_Intrinsics_vec256 t2 = a24; - Lib_IntVector_Intrinsics_vec256 t3 = a34; - Lib_IntVector_Intrinsics_vec256 t4 = a44; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z120 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z120); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - rn[0U] = o0; - rn[1U] = o1; - rn[2U] = o2; - rn[3U] = o3; - rn[4U] = o4; - Lib_IntVector_Intrinsics_vec256 f202 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 f21 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 f22 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 f23 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 f24 = rn[4U]; - rn_5[0U] = Lib_IntVector_Intrinsics_vec256_smul64(f202, (uint64_t)5U); - rn_5[1U] = Lib_IntVector_Intrinsics_vec256_smul64(f21, (uint64_t)5U); - rn_5[2U] = Lib_IntVector_Intrinsics_vec256_smul64(f22, (uint64_t)5U); - rn_5[3U] = Lib_IntVector_Intrinsics_vec256_smul64(f23, (uint64_t)5U); - rn_5[4U] = Lib_IntVector_Intrinsics_vec256_smul64(f24, (uint64_t)5U); -} - -void -Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text) -{ - Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(text); - uint64_t lo = u0; - uint64_t u = load64_le(text + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f41 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *r = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t0 = a06; - Lib_IntVector_Intrinsics_vec256 t1 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t0, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; -} - -void -Hacl_Poly1305_256_poly1305_update( - Lib_IntVector_Intrinsics_vec256 *ctx, - uint32_t len, - uint8_t *text) -{ - Lib_IntVector_Intrinsics_vec256 *pre = ctx + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - uint32_t sz_block = (uint32_t)64U; - uint32_t len0 = len / sz_block * sz_block; - uint8_t *t0 = text; - if (len0 > (uint32_t)0U) { - uint32_t bs = (uint32_t)64U; - uint8_t *text0 = t0; - Hacl_Impl_Poly1305_Field32xN_256_load_acc4(acc, text0); - uint32_t len1 = len0 - bs; - uint8_t *text1 = t0 + bs; - uint32_t nb = len1 / bs; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *block = text1 + i * bs; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Lib_IntVector_Intrinsics_vec256 lo = Lib_IntVector_Intrinsics_vec256_load_le(block); - Lib_IntVector_Intrinsics_vec256 - hi = Lib_IntVector_Intrinsics_vec256_load_le(block + (uint32_t)32U); - Lib_IntVector_Intrinsics_vec256 - mask2610 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - m0 = Lib_IntVector_Intrinsics_vec256_interleave_low128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m1 = Lib_IntVector_Intrinsics_vec256_interleave_high128(lo, hi); - Lib_IntVector_Intrinsics_vec256 - m2 = Lib_IntVector_Intrinsics_vec256_shift_right(m0, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 - m3 = Lib_IntVector_Intrinsics_vec256_shift_right(m1, (uint32_t)48U); - Lib_IntVector_Intrinsics_vec256 - m4 = Lib_IntVector_Intrinsics_vec256_interleave_high64(m0, m1); - Lib_IntVector_Intrinsics_vec256 - t010 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m0, m1); - Lib_IntVector_Intrinsics_vec256 - t30 = Lib_IntVector_Intrinsics_vec256_interleave_low64(m2, m3); - Lib_IntVector_Intrinsics_vec256 - t20 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)4U); - Lib_IntVector_Intrinsics_vec256 o20 = Lib_IntVector_Intrinsics_vec256_and(t20, mask2610); - Lib_IntVector_Intrinsics_vec256 - t10 = Lib_IntVector_Intrinsics_vec256_shift_right64(t010, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 o10 = Lib_IntVector_Intrinsics_vec256_and(t10, mask2610); - Lib_IntVector_Intrinsics_vec256 o5 = Lib_IntVector_Intrinsics_vec256_and(t010, mask2610); - Lib_IntVector_Intrinsics_vec256 - t31 = Lib_IntVector_Intrinsics_vec256_shift_right64(t30, (uint32_t)30U); - Lib_IntVector_Intrinsics_vec256 o30 = Lib_IntVector_Intrinsics_vec256_and(t31, mask2610); - Lib_IntVector_Intrinsics_vec256 - o40 = Lib_IntVector_Intrinsics_vec256_shift_right64(m4, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 o00 = o5; - Lib_IntVector_Intrinsics_vec256 o11 = o10; - Lib_IntVector_Intrinsics_vec256 o21 = o20; - Lib_IntVector_Intrinsics_vec256 o31 = o30; - Lib_IntVector_Intrinsics_vec256 o41 = o40; - e[0U] = o00; - e[1U] = o11; - e[2U] = o21; - e[3U] = o31; - e[4U] = o41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *rn = pre + (uint32_t)10U; - Lib_IntVector_Intrinsics_vec256 *rn5 = pre + (uint32_t)15U; - Lib_IntVector_Intrinsics_vec256 r0 = rn[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = rn[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = rn[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = rn[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = rn[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = rn5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = rn5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = rn5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = rn5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 f110 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 f120 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 f130 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 f140 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = Lib_IntVector_Intrinsics_vec256_mul64(r0, f10); - Lib_IntVector_Intrinsics_vec256 a1 = Lib_IntVector_Intrinsics_vec256_mul64(r1, f10); - Lib_IntVector_Intrinsics_vec256 a2 = Lib_IntVector_Intrinsics_vec256_mul64(r2, f10); - Lib_IntVector_Intrinsics_vec256 a3 = Lib_IntVector_Intrinsics_vec256_mul64(r3, f10); - Lib_IntVector_Intrinsics_vec256 a4 = Lib_IntVector_Intrinsics_vec256_mul64(r4, f10); - Lib_IntVector_Intrinsics_vec256 - a01 = - Lib_IntVector_Intrinsics_vec256_add64(a0, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f110)); - Lib_IntVector_Intrinsics_vec256 - a11 = - Lib_IntVector_Intrinsics_vec256_add64(a1, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f110)); - Lib_IntVector_Intrinsics_vec256 - a21 = - Lib_IntVector_Intrinsics_vec256_add64(a2, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f110)); - Lib_IntVector_Intrinsics_vec256 - a31 = - Lib_IntVector_Intrinsics_vec256_add64(a3, - Lib_IntVector_Intrinsics_vec256_mul64(r2, f110)); - Lib_IntVector_Intrinsics_vec256 - a41 = - Lib_IntVector_Intrinsics_vec256_add64(a4, - Lib_IntVector_Intrinsics_vec256_mul64(r3, f110)); - Lib_IntVector_Intrinsics_vec256 - a02 = - Lib_IntVector_Intrinsics_vec256_add64(a01, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f120)); - Lib_IntVector_Intrinsics_vec256 - a12 = - Lib_IntVector_Intrinsics_vec256_add64(a11, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f120)); - Lib_IntVector_Intrinsics_vec256 - a22 = - Lib_IntVector_Intrinsics_vec256_add64(a21, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f120)); - Lib_IntVector_Intrinsics_vec256 - a32 = - Lib_IntVector_Intrinsics_vec256_add64(a31, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f120)); - Lib_IntVector_Intrinsics_vec256 - a42 = - Lib_IntVector_Intrinsics_vec256_add64(a41, - Lib_IntVector_Intrinsics_vec256_mul64(r2, f120)); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f130)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f130)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f130)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f130)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r1, f130)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r51, f140)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r52, f140)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r53, f140)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r54, f140)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r0, f140)); - Lib_IntVector_Intrinsics_vec256 t01 = a04; - Lib_IntVector_Intrinsics_vec256 t1 = a14; - Lib_IntVector_Intrinsics_vec256 t2 = a24; - Lib_IntVector_Intrinsics_vec256 t3 = a34; - Lib_IntVector_Intrinsics_vec256 t4 = a44; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t1, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o01 = x02; - Lib_IntVector_Intrinsics_vec256 o12 = x12; - Lib_IntVector_Intrinsics_vec256 o22 = x21; - Lib_IntVector_Intrinsics_vec256 o32 = x32; - Lib_IntVector_Intrinsics_vec256 o42 = x42; - acc[0U] = o01; - acc[1U] = o12; - acc[2U] = o22; - acc[3U] = o32; - acc[4U] = o42; - Lib_IntVector_Intrinsics_vec256 f100 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 f20 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f21 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f22 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f23 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f24 = e[4U]; - Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_add64(f100, f20); - Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_add64(f11, f21); - Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_add64(f12, f22); - Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_add64(f13, f23); - Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_add64(f14, f24); - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; - } - Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize(acc, pre); - } - uint32_t len1 = len - len0; - uint8_t *t1 = text + len0; - uint32_t nb = len1 / (uint32_t)16U; - uint32_t rem1 = len1 % (uint32_t)16U; - for (uint32_t i = (uint32_t)0U; i < nb; i++) { - uint8_t *block = t1 + i * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint64_t u0 = load64_le(block); - uint64_t lo = u0; - uint64_t u = load64_le(block + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f41 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f41; - uint64_t b = (uint64_t)0x1000000U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 f4 = e[4U]; - e[4U] = Lib_IntVector_Intrinsics_vec256_or(f4, mask); - Lib_IntVector_Intrinsics_vec256 *r = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t01 = a06; - Lib_IntVector_Intrinsics_vec256 t11 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; - } - if (rem1 > (uint32_t)0U) { - uint8_t *last1 = t1 + nb * (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 e[5U]; - for (uint32_t _i = 0U; _i < (uint32_t)5U; ++_i) - e[_i] = Lib_IntVector_Intrinsics_vec256_zero; - uint8_t tmp[16U] = { 0U }; - memcpy(tmp, last1, rem1 * sizeof(last1[0U])); - uint64_t u0 = load64_le(tmp); - uint64_t lo = u0; - uint64_t u = load64_le(tmp + (uint32_t)8U); - uint64_t hi = u; - Lib_IntVector_Intrinsics_vec256 f0 = Lib_IntVector_Intrinsics_vec256_load64(lo); - Lib_IntVector_Intrinsics_vec256 f1 = Lib_IntVector_Intrinsics_vec256_load64(hi); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_and(f0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f110 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)26U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f20 = - Lib_IntVector_Intrinsics_vec256_or(Lib_IntVector_Intrinsics_vec256_shift_right64(f0, - (uint32_t)52U), - Lib_IntVector_Intrinsics_vec256_shift_left64(Lib_IntVector_Intrinsics_vec256_and(f1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffU)), - (uint32_t)12U)); - Lib_IntVector_Intrinsics_vec256 - f30 = - Lib_IntVector_Intrinsics_vec256_and(Lib_IntVector_Intrinsics_vec256_shift_right64(f1, - (uint32_t)14U), - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - f40 = Lib_IntVector_Intrinsics_vec256_shift_right64(f1, (uint32_t)40U); - Lib_IntVector_Intrinsics_vec256 f01 = f010; - Lib_IntVector_Intrinsics_vec256 f111 = f110; - Lib_IntVector_Intrinsics_vec256 f2 = f20; - Lib_IntVector_Intrinsics_vec256 f3 = f30; - Lib_IntVector_Intrinsics_vec256 f4 = f40; - e[0U] = f01; - e[1U] = f111; - e[2U] = f2; - e[3U] = f3; - e[4U] = f4; - uint64_t b = (uint64_t)1U << rem1 * (uint32_t)8U % (uint32_t)26U; - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_load64(b); - Lib_IntVector_Intrinsics_vec256 fi = e[rem1 * (uint32_t)8U / (uint32_t)26U]; - e[rem1 * (uint32_t)8U / (uint32_t)26U] = Lib_IntVector_Intrinsics_vec256_or(fi, mask); - Lib_IntVector_Intrinsics_vec256 *r = pre; - Lib_IntVector_Intrinsics_vec256 *r5 = pre + (uint32_t)5U; - Lib_IntVector_Intrinsics_vec256 r0 = r[0U]; - Lib_IntVector_Intrinsics_vec256 r1 = r[1U]; - Lib_IntVector_Intrinsics_vec256 r2 = r[2U]; - Lib_IntVector_Intrinsics_vec256 r3 = r[3U]; - Lib_IntVector_Intrinsics_vec256 r4 = r[4U]; - Lib_IntVector_Intrinsics_vec256 r51 = r5[1U]; - Lib_IntVector_Intrinsics_vec256 r52 = r5[2U]; - Lib_IntVector_Intrinsics_vec256 r53 = r5[3U]; - Lib_IntVector_Intrinsics_vec256 r54 = r5[4U]; - Lib_IntVector_Intrinsics_vec256 f10 = e[0U]; - Lib_IntVector_Intrinsics_vec256 f11 = e[1U]; - Lib_IntVector_Intrinsics_vec256 f12 = e[2U]; - Lib_IntVector_Intrinsics_vec256 f13 = e[3U]; - Lib_IntVector_Intrinsics_vec256 f14 = e[4U]; - Lib_IntVector_Intrinsics_vec256 a0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 a1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 a2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 a3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 a4 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 a01 = Lib_IntVector_Intrinsics_vec256_add64(a0, f10); - Lib_IntVector_Intrinsics_vec256 a11 = Lib_IntVector_Intrinsics_vec256_add64(a1, f11); - Lib_IntVector_Intrinsics_vec256 a21 = Lib_IntVector_Intrinsics_vec256_add64(a2, f12); - Lib_IntVector_Intrinsics_vec256 a31 = Lib_IntVector_Intrinsics_vec256_add64(a3, f13); - Lib_IntVector_Intrinsics_vec256 a41 = Lib_IntVector_Intrinsics_vec256_add64(a4, f14); - Lib_IntVector_Intrinsics_vec256 a02 = Lib_IntVector_Intrinsics_vec256_mul64(r0, a01); - Lib_IntVector_Intrinsics_vec256 a12 = Lib_IntVector_Intrinsics_vec256_mul64(r1, a01); - Lib_IntVector_Intrinsics_vec256 a22 = Lib_IntVector_Intrinsics_vec256_mul64(r2, a01); - Lib_IntVector_Intrinsics_vec256 a32 = Lib_IntVector_Intrinsics_vec256_mul64(r3, a01); - Lib_IntVector_Intrinsics_vec256 a42 = Lib_IntVector_Intrinsics_vec256_mul64(r4, a01); - Lib_IntVector_Intrinsics_vec256 - a03 = - Lib_IntVector_Intrinsics_vec256_add64(a02, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a11)); - Lib_IntVector_Intrinsics_vec256 - a13 = - Lib_IntVector_Intrinsics_vec256_add64(a12, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a11)); - Lib_IntVector_Intrinsics_vec256 - a23 = - Lib_IntVector_Intrinsics_vec256_add64(a22, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a11)); - Lib_IntVector_Intrinsics_vec256 - a33 = - Lib_IntVector_Intrinsics_vec256_add64(a32, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a11)); - Lib_IntVector_Intrinsics_vec256 - a43 = - Lib_IntVector_Intrinsics_vec256_add64(a42, - Lib_IntVector_Intrinsics_vec256_mul64(r3, a11)); - Lib_IntVector_Intrinsics_vec256 - a04 = - Lib_IntVector_Intrinsics_vec256_add64(a03, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a21)); - Lib_IntVector_Intrinsics_vec256 - a14 = - Lib_IntVector_Intrinsics_vec256_add64(a13, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a21)); - Lib_IntVector_Intrinsics_vec256 - a24 = - Lib_IntVector_Intrinsics_vec256_add64(a23, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a21)); - Lib_IntVector_Intrinsics_vec256 - a34 = - Lib_IntVector_Intrinsics_vec256_add64(a33, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a21)); - Lib_IntVector_Intrinsics_vec256 - a44 = - Lib_IntVector_Intrinsics_vec256_add64(a43, - Lib_IntVector_Intrinsics_vec256_mul64(r2, a21)); - Lib_IntVector_Intrinsics_vec256 - a05 = - Lib_IntVector_Intrinsics_vec256_add64(a04, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a31)); - Lib_IntVector_Intrinsics_vec256 - a15 = - Lib_IntVector_Intrinsics_vec256_add64(a14, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a31)); - Lib_IntVector_Intrinsics_vec256 - a25 = - Lib_IntVector_Intrinsics_vec256_add64(a24, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a31)); - Lib_IntVector_Intrinsics_vec256 - a35 = - Lib_IntVector_Intrinsics_vec256_add64(a34, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a31)); - Lib_IntVector_Intrinsics_vec256 - a45 = - Lib_IntVector_Intrinsics_vec256_add64(a44, - Lib_IntVector_Intrinsics_vec256_mul64(r1, a31)); - Lib_IntVector_Intrinsics_vec256 - a06 = - Lib_IntVector_Intrinsics_vec256_add64(a05, - Lib_IntVector_Intrinsics_vec256_mul64(r51, a41)); - Lib_IntVector_Intrinsics_vec256 - a16 = - Lib_IntVector_Intrinsics_vec256_add64(a15, - Lib_IntVector_Intrinsics_vec256_mul64(r52, a41)); - Lib_IntVector_Intrinsics_vec256 - a26 = - Lib_IntVector_Intrinsics_vec256_add64(a25, - Lib_IntVector_Intrinsics_vec256_mul64(r53, a41)); - Lib_IntVector_Intrinsics_vec256 - a36 = - Lib_IntVector_Intrinsics_vec256_add64(a35, - Lib_IntVector_Intrinsics_vec256_mul64(r54, a41)); - Lib_IntVector_Intrinsics_vec256 - a46 = - Lib_IntVector_Intrinsics_vec256_add64(a45, - Lib_IntVector_Intrinsics_vec256_mul64(r0, a41)); - Lib_IntVector_Intrinsics_vec256 t01 = a06; - Lib_IntVector_Intrinsics_vec256 t11 = a16; - Lib_IntVector_Intrinsics_vec256 t2 = a26; - Lib_IntVector_Intrinsics_vec256 t3 = a36; - Lib_IntVector_Intrinsics_vec256 t4 = a46; - Lib_IntVector_Intrinsics_vec256 - mask261 = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - z0 = Lib_IntVector_Intrinsics_vec256_shift_right64(t01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z1 = Lib_IntVector_Intrinsics_vec256_shift_right64(t3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x0 = Lib_IntVector_Intrinsics_vec256_and(t01, mask261); - Lib_IntVector_Intrinsics_vec256 x3 = Lib_IntVector_Intrinsics_vec256_and(t3, mask261); - Lib_IntVector_Intrinsics_vec256 x1 = Lib_IntVector_Intrinsics_vec256_add64(t11, z0); - Lib_IntVector_Intrinsics_vec256 x4 = Lib_IntVector_Intrinsics_vec256_add64(t4, z1); - Lib_IntVector_Intrinsics_vec256 - z01 = Lib_IntVector_Intrinsics_vec256_shift_right64(x1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z11 = Lib_IntVector_Intrinsics_vec256_shift_right64(x4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - t = Lib_IntVector_Intrinsics_vec256_shift_left64(z11, (uint32_t)2U); - Lib_IntVector_Intrinsics_vec256 z12 = Lib_IntVector_Intrinsics_vec256_add64(z11, t); - Lib_IntVector_Intrinsics_vec256 x11 = Lib_IntVector_Intrinsics_vec256_and(x1, mask261); - Lib_IntVector_Intrinsics_vec256 x41 = Lib_IntVector_Intrinsics_vec256_and(x4, mask261); - Lib_IntVector_Intrinsics_vec256 x2 = Lib_IntVector_Intrinsics_vec256_add64(t2, z01); - Lib_IntVector_Intrinsics_vec256 x01 = Lib_IntVector_Intrinsics_vec256_add64(x0, z12); - Lib_IntVector_Intrinsics_vec256 - z02 = Lib_IntVector_Intrinsics_vec256_shift_right64(x2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - z13 = Lib_IntVector_Intrinsics_vec256_shift_right64(x01, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x21 = Lib_IntVector_Intrinsics_vec256_and(x2, mask261); - Lib_IntVector_Intrinsics_vec256 x02 = Lib_IntVector_Intrinsics_vec256_and(x01, mask261); - Lib_IntVector_Intrinsics_vec256 x31 = Lib_IntVector_Intrinsics_vec256_add64(x3, z02); - Lib_IntVector_Intrinsics_vec256 x12 = Lib_IntVector_Intrinsics_vec256_add64(x11, z13); - Lib_IntVector_Intrinsics_vec256 - z03 = Lib_IntVector_Intrinsics_vec256_shift_right64(x31, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 x32 = Lib_IntVector_Intrinsics_vec256_and(x31, mask261); - Lib_IntVector_Intrinsics_vec256 x42 = Lib_IntVector_Intrinsics_vec256_add64(x41, z03); - Lib_IntVector_Intrinsics_vec256 o0 = x02; - Lib_IntVector_Intrinsics_vec256 o1 = x12; - Lib_IntVector_Intrinsics_vec256 o2 = x21; - Lib_IntVector_Intrinsics_vec256 o3 = x32; - Lib_IntVector_Intrinsics_vec256 o4 = x42; - acc[0U] = o0; - acc[1U] = o1; - acc[2U] = o2; - acc[3U] = o3; - acc[4U] = o4; - return; - } -} - -void -Hacl_Poly1305_256_poly1305_finish( - uint8_t *tag, - uint8_t *key, - Lib_IntVector_Intrinsics_vec256 *ctx) -{ - Lib_IntVector_Intrinsics_vec256 *acc = ctx; - uint8_t *ks = key + (uint32_t)16U; - Lib_IntVector_Intrinsics_vec256 f0 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 f13 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 f23 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 f33 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 f40 = acc[4U]; - Lib_IntVector_Intrinsics_vec256 - l0 = Lib_IntVector_Intrinsics_vec256_add64(f0, Lib_IntVector_Intrinsics_vec256_zero); - Lib_IntVector_Intrinsics_vec256 - tmp00 = - Lib_IntVector_Intrinsics_vec256_and(l0, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c00 = Lib_IntVector_Intrinsics_vec256_shift_right64(l0, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l1 = Lib_IntVector_Intrinsics_vec256_add64(f13, c00); - Lib_IntVector_Intrinsics_vec256 - tmp10 = - Lib_IntVector_Intrinsics_vec256_and(l1, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c10 = Lib_IntVector_Intrinsics_vec256_shift_right64(l1, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l2 = Lib_IntVector_Intrinsics_vec256_add64(f23, c10); - Lib_IntVector_Intrinsics_vec256 - tmp20 = - Lib_IntVector_Intrinsics_vec256_and(l2, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c20 = Lib_IntVector_Intrinsics_vec256_shift_right64(l2, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l3 = Lib_IntVector_Intrinsics_vec256_add64(f33, c20); - Lib_IntVector_Intrinsics_vec256 - tmp30 = - Lib_IntVector_Intrinsics_vec256_and(l3, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c30 = Lib_IntVector_Intrinsics_vec256_shift_right64(l3, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l4 = Lib_IntVector_Intrinsics_vec256_add64(f40, c30); - Lib_IntVector_Intrinsics_vec256 - tmp40 = - Lib_IntVector_Intrinsics_vec256_and(l4, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c40 = Lib_IntVector_Intrinsics_vec256_shift_right64(l4, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - f010 = - Lib_IntVector_Intrinsics_vec256_add64(tmp00, - Lib_IntVector_Intrinsics_vec256_smul64(c40, (uint64_t)5U)); - Lib_IntVector_Intrinsics_vec256 f110 = tmp10; - Lib_IntVector_Intrinsics_vec256 f210 = tmp20; - Lib_IntVector_Intrinsics_vec256 f310 = tmp30; - Lib_IntVector_Intrinsics_vec256 f410 = tmp40; - Lib_IntVector_Intrinsics_vec256 - l = Lib_IntVector_Intrinsics_vec256_add64(f010, Lib_IntVector_Intrinsics_vec256_zero); - Lib_IntVector_Intrinsics_vec256 - tmp0 = - Lib_IntVector_Intrinsics_vec256_and(l, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c0 = Lib_IntVector_Intrinsics_vec256_shift_right64(l, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l5 = Lib_IntVector_Intrinsics_vec256_add64(f110, c0); - Lib_IntVector_Intrinsics_vec256 - tmp1 = - Lib_IntVector_Intrinsics_vec256_and(l5, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c1 = Lib_IntVector_Intrinsics_vec256_shift_right64(l5, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l6 = Lib_IntVector_Intrinsics_vec256_add64(f210, c1); - Lib_IntVector_Intrinsics_vec256 - tmp2 = - Lib_IntVector_Intrinsics_vec256_and(l6, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c2 = Lib_IntVector_Intrinsics_vec256_shift_right64(l6, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l7 = Lib_IntVector_Intrinsics_vec256_add64(f310, c2); - Lib_IntVector_Intrinsics_vec256 - tmp3 = - Lib_IntVector_Intrinsics_vec256_and(l7, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c3 = Lib_IntVector_Intrinsics_vec256_shift_right64(l7, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 l8 = Lib_IntVector_Intrinsics_vec256_add64(f410, c3); - Lib_IntVector_Intrinsics_vec256 - tmp4 = - Lib_IntVector_Intrinsics_vec256_and(l8, - Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU)); - Lib_IntVector_Intrinsics_vec256 - c4 = Lib_IntVector_Intrinsics_vec256_shift_right64(l8, (uint32_t)26U); - Lib_IntVector_Intrinsics_vec256 - f02 = - Lib_IntVector_Intrinsics_vec256_add64(tmp0, - Lib_IntVector_Intrinsics_vec256_smul64(c4, (uint64_t)5U)); - Lib_IntVector_Intrinsics_vec256 f12 = tmp1; - Lib_IntVector_Intrinsics_vec256 f22 = tmp2; - Lib_IntVector_Intrinsics_vec256 f32 = tmp3; - Lib_IntVector_Intrinsics_vec256 f42 = tmp4; - Lib_IntVector_Intrinsics_vec256 - mh = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3ffffffU); - Lib_IntVector_Intrinsics_vec256 - ml = Lib_IntVector_Intrinsics_vec256_load64((uint64_t)0x3fffffbU); - Lib_IntVector_Intrinsics_vec256 mask = Lib_IntVector_Intrinsics_vec256_eq64(f42, mh); - Lib_IntVector_Intrinsics_vec256 - mask1 = - Lib_IntVector_Intrinsics_vec256_and(mask, - Lib_IntVector_Intrinsics_vec256_eq64(f32, mh)); - Lib_IntVector_Intrinsics_vec256 - mask2 = - Lib_IntVector_Intrinsics_vec256_and(mask1, - Lib_IntVector_Intrinsics_vec256_eq64(f22, mh)); - Lib_IntVector_Intrinsics_vec256 - mask3 = - Lib_IntVector_Intrinsics_vec256_and(mask2, - Lib_IntVector_Intrinsics_vec256_eq64(f12, mh)); - Lib_IntVector_Intrinsics_vec256 - mask4 = - Lib_IntVector_Intrinsics_vec256_and(mask3, - Lib_IntVector_Intrinsics_vec256_lognot(Lib_IntVector_Intrinsics_vec256_gt64(ml, f02))); - Lib_IntVector_Intrinsics_vec256 ph = Lib_IntVector_Intrinsics_vec256_and(mask4, mh); - Lib_IntVector_Intrinsics_vec256 pl = Lib_IntVector_Intrinsics_vec256_and(mask4, ml); - Lib_IntVector_Intrinsics_vec256 o0 = Lib_IntVector_Intrinsics_vec256_sub64(f02, pl); - Lib_IntVector_Intrinsics_vec256 o1 = Lib_IntVector_Intrinsics_vec256_sub64(f12, ph); - Lib_IntVector_Intrinsics_vec256 o2 = Lib_IntVector_Intrinsics_vec256_sub64(f22, ph); - Lib_IntVector_Intrinsics_vec256 o3 = Lib_IntVector_Intrinsics_vec256_sub64(f32, ph); - Lib_IntVector_Intrinsics_vec256 o4 = Lib_IntVector_Intrinsics_vec256_sub64(f42, ph); - Lib_IntVector_Intrinsics_vec256 f011 = o0; - Lib_IntVector_Intrinsics_vec256 f111 = o1; - Lib_IntVector_Intrinsics_vec256 f211 = o2; - Lib_IntVector_Intrinsics_vec256 f311 = o3; - Lib_IntVector_Intrinsics_vec256 f411 = o4; - acc[0U] = f011; - acc[1U] = f111; - acc[2U] = f211; - acc[3U] = f311; - acc[4U] = f411; - Lib_IntVector_Intrinsics_vec256 f00 = acc[0U]; - Lib_IntVector_Intrinsics_vec256 f1 = acc[1U]; - Lib_IntVector_Intrinsics_vec256 f2 = acc[2U]; - Lib_IntVector_Intrinsics_vec256 f3 = acc[3U]; - Lib_IntVector_Intrinsics_vec256 f4 = acc[4U]; - uint64_t f01 = Lib_IntVector_Intrinsics_vec256_extract64(f00, (uint32_t)0U); - uint64_t f112 = Lib_IntVector_Intrinsics_vec256_extract64(f1, (uint32_t)0U); - uint64_t f212 = Lib_IntVector_Intrinsics_vec256_extract64(f2, (uint32_t)0U); - uint64_t f312 = Lib_IntVector_Intrinsics_vec256_extract64(f3, (uint32_t)0U); - uint64_t f41 = Lib_IntVector_Intrinsics_vec256_extract64(f4, (uint32_t)0U); - uint64_t lo = (f01 | f112 << (uint32_t)26U) | f212 << (uint32_t)52U; - uint64_t hi = (f212 >> (uint32_t)12U | f312 << (uint32_t)14U) | f41 << (uint32_t)40U; - uint64_t f10 = lo; - uint64_t f11 = hi; - uint64_t u0 = load64_le(ks); - uint64_t lo0 = u0; - uint64_t u = load64_le(ks + (uint32_t)8U); - uint64_t hi0 = u; - uint64_t f20 = lo0; - uint64_t f21 = hi0; - uint64_t r0 = f10 + f20; - uint64_t r1 = f11 + f21; - uint64_t c = (r0 ^ ((r0 ^ f20) | ((r0 - f20) ^ f20))) >> (uint32_t)63U; - uint64_t r11 = r1 + c; - uint64_t f30 = r0; - uint64_t f31 = r11; - store64_le(tag, f30); - store64_le(tag + (uint32_t)8U, f31); -} - -void -Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key) -{ - Lib_IntVector_Intrinsics_vec256 ctx[25U]; - for (uint32_t _i = 0U; _i < (uint32_t)25U; ++_i) - ctx[_i] = Lib_IntVector_Intrinsics_vec256_zero; - Hacl_Poly1305_256_poly1305_init(ctx, key); - Hacl_Poly1305_256_poly1305_update(ctx, len, text); - Hacl_Poly1305_256_poly1305_finish(tag, key, ctx); -} diff --git a/lib/freebl/verified/Hacl_Poly1305_256.h b/lib/freebl/verified/Hacl_Poly1305_256.h deleted file mode 100644 index 9d5ff5728..000000000 --- a/lib/freebl/verified/Hacl_Poly1305_256.h +++ /dev/null @@ -1,66 +0,0 @@ -/* MIT License - * - * Copyright (c) 2016-2020 INRIA, CMU and Microsoft Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to deal - * in the Software without restriction, including without limitation the rights - * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - * copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "libintvector.h" -#include "kremlin/internal/types.h" -#include "kremlin/lowstar_endianness.h" -#include <string.h> -#include <stdbool.h> - -#ifndef __Hacl_Poly1305_256_H -#define __Hacl_Poly1305_256_H - -#include "Hacl_Kremlib.h" - -void -Hacl_Impl_Poly1305_Field32xN_256_load_acc4(Lib_IntVector_Intrinsics_vec256 *acc, uint8_t *b); - -void -Hacl_Impl_Poly1305_Field32xN_256_fmul_r4_normalize( - Lib_IntVector_Intrinsics_vec256 *out, - Lib_IntVector_Intrinsics_vec256 *p); - -extern uint32_t Hacl_Poly1305_256_blocklen; - -typedef Lib_IntVector_Intrinsics_vec256 *Hacl_Poly1305_256_poly1305_ctx; - -void Hacl_Poly1305_256_poly1305_init(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *key); - -void Hacl_Poly1305_256_poly1305_update1(Lib_IntVector_Intrinsics_vec256 *ctx, uint8_t *text); - -void -Hacl_Poly1305_256_poly1305_update( - Lib_IntVector_Intrinsics_vec256 *ctx, - uint32_t len, - uint8_t *text); - -void -Hacl_Poly1305_256_poly1305_finish( - uint8_t *tag, - uint8_t *key, - Lib_IntVector_Intrinsics_vec256 *ctx); - -void Hacl_Poly1305_256_poly1305_mac(uint8_t *tag, uint32_t len, uint8_t *text, uint8_t *key); - -#define __Hacl_Poly1305_256_H_DEFINED -#endif diff --git a/nss-tool/hw-support.c b/nss-tool/hw-support.c index ac6c10d90..561064c17 100644 --- a/nss-tool/hw-support.c +++ b/nss-tool/hw-support.c @@ -23,7 +23,6 @@ int main(int argc, char const *argv[]) { printf("\tAES-NI \t%s supported\n", aesni_support() ? "" : "not"); printf("\tPCLMUL \t%s supported\n", clmul_support() ? "" : "not"); printf("\tAVX \t%s supported\n", avx_support() ? "" : "not"); - printf("\tAVX2 \t%s supported\n", avx2_support() ? "" : "not"); printf("\tSSSE3 \t%s supported\n", ssse3_support() ? "" : "not"); printf("\tSSE4.1 \t%s supported\n", sse4_1_support() ? "" : "not"); printf("\tSSE4.2 \t%s supported\n", sse4_2_support() ? "" : "not"); |